mirror of
https://github.com/torvalds/linux.git
synced 2024-12-02 09:01:34 +00:00
Merge tag 'drm-intel-gt-next-2020-09-07' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
(Same content as drm-intel-gt-next-2020-09-04-3, S-o-b's added) UAPI Changes: (- Potential implicit changes from WW locking refactoring) Cross-subsystem Changes: (- WW locking changes should align the i915 locking more with others) Driver Changes: - MAJOR: Apply WW locking across the driver (Maarten) - Reverts for 5 commits to make applying WW locking faster (Maarten) - Disable preparser around invalidations on Tigerlake for non-RCS engines (Chris) - Add missing dma_fence_put() for error case of syncobj timeline (Chris) - Parse command buffer earlier in eb_relocate(slow) to facilitate backoff (Maarten) - Pin engine before pinning all objects (Maarten) - Rework intel_context pinning to do everything outside of pin_mutex (Maarten) - Avoid tracking GEM context until registered (Cc: stable, Chris) - Provide a fastpath for waiting on vma bindings (Chris) - Fixes to preempt-to-busy mechanism (Chris) - Distinguish the virtual breadcrumbs from the irq breadcrumbs (Chris) - Switch to object allocations for page directories (Chris) - Hold context/request reference while breadcrumbs are active (Chris) - Make sure execbuffer always passes ww state to i915_vma_pin (Maarten) - Code refactoring to facilitate use of WW locking (Maarten) - Locking refactoring to use more granular locking (Maarten, Chris) - Support for multiple pinned timelines per engine (Chris) - Move complication of I915_GEM_THROTTLE to the ioctl from general code (Chris) - Make active tracking/vma page-directory stash work preallocated (Chris) - Avoid flushing submission tasklet too often (Chris) - Reduce context termination list iteration guard to RCU (Chris) - Reductions to locking contention (Chris) - Fixes for issues found by CI (Chris) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Joonas Lahtinen <jlahtine@jlahtine-mobl.ger.corp.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200907130039.GA27766@jlahtine-mobl.ger.corp.intel.com
This commit is contained in:
commit
1f4b2aca79
@ -2311,7 +2311,7 @@ err:
|
||||
|
||||
void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags)
|
||||
{
|
||||
i915_gem_object_lock(vma->obj);
|
||||
i915_gem_object_lock(vma->obj, NULL);
|
||||
if (flags & PLANE_HAS_FENCE)
|
||||
i915_vma_unpin_fence(vma);
|
||||
i915_gem_object_unpin_from_display_plane(vma);
|
||||
@ -3451,7 +3451,7 @@ initial_plane_vma(struct drm_i915_private *i915,
|
||||
if (IS_ERR(vma))
|
||||
goto err_obj;
|
||||
|
||||
if (i915_ggtt_pin(vma, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base))
|
||||
if (i915_ggtt_pin(vma, NULL, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base))
|
||||
goto err_obj;
|
||||
|
||||
if (i915_gem_object_is_tiled(obj) &&
|
||||
@ -17194,7 +17194,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
|
||||
if (!intel_fb->frontbuffer)
|
||||
return -ENOMEM;
|
||||
|
||||
i915_gem_object_lock(obj);
|
||||
i915_gem_object_lock(obj, NULL);
|
||||
tiling = i915_gem_object_get_tiling(obj);
|
||||
stride = i915_gem_object_get_stride(obj);
|
||||
i915_gem_object_unlock(obj);
|
||||
|
@ -32,12 +32,13 @@ static void vma_clear_pages(struct i915_vma *vma)
|
||||
vma->pages = NULL;
|
||||
}
|
||||
|
||||
static int vma_bind(struct i915_address_space *vm,
|
||||
struct i915_vma *vma,
|
||||
enum i915_cache_level cache_level,
|
||||
u32 flags)
|
||||
static void vma_bind(struct i915_address_space *vm,
|
||||
struct i915_vm_pt_stash *stash,
|
||||
struct i915_vma *vma,
|
||||
enum i915_cache_level cache_level,
|
||||
u32 flags)
|
||||
{
|
||||
return vm->vma_ops.bind_vma(vm, vma, cache_level, flags);
|
||||
vm->vma_ops.bind_vma(vm, stash, vma, cache_level, flags);
|
||||
}
|
||||
|
||||
static void vma_unbind(struct i915_address_space *vm, struct i915_vma *vma)
|
||||
@ -157,6 +158,7 @@ static void clear_pages_worker(struct work_struct *work)
|
||||
struct clear_pages_work *w = container_of(work, typeof(*w), work);
|
||||
struct drm_i915_gem_object *obj = w->sleeve->vma->obj;
|
||||
struct i915_vma *vma = w->sleeve->vma;
|
||||
struct i915_gem_ww_ctx ww;
|
||||
struct i915_request *rq;
|
||||
struct i915_vma *batch;
|
||||
int err = w->dma.error;
|
||||
@ -172,17 +174,20 @@ static void clear_pages_worker(struct work_struct *work)
|
||||
obj->read_domains = I915_GEM_GPU_DOMAINS;
|
||||
obj->write_domain = 0;
|
||||
|
||||
err = i915_vma_pin(vma, 0, 0, PIN_USER);
|
||||
if (unlikely(err))
|
||||
i915_gem_ww_ctx_init(&ww, false);
|
||||
intel_engine_pm_get(w->ce->engine);
|
||||
retry:
|
||||
err = intel_context_pin_ww(w->ce, &ww);
|
||||
if (err)
|
||||
goto out_signal;
|
||||
|
||||
batch = intel_emit_vma_fill_blt(w->ce, vma, w->value);
|
||||
batch = intel_emit_vma_fill_blt(w->ce, vma, &ww, w->value);
|
||||
if (IS_ERR(batch)) {
|
||||
err = PTR_ERR(batch);
|
||||
goto out_unpin;
|
||||
goto out_ctx;
|
||||
}
|
||||
|
||||
rq = intel_context_create_request(w->ce);
|
||||
rq = i915_request_create(w->ce);
|
||||
if (IS_ERR(rq)) {
|
||||
err = PTR_ERR(rq);
|
||||
goto out_batch;
|
||||
@ -224,9 +229,19 @@ out_request:
|
||||
i915_request_add(rq);
|
||||
out_batch:
|
||||
intel_emit_vma_release(w->ce, batch);
|
||||
out_unpin:
|
||||
i915_vma_unpin(vma);
|
||||
out_ctx:
|
||||
intel_context_unpin(w->ce);
|
||||
out_signal:
|
||||
if (err == -EDEADLK) {
|
||||
err = i915_gem_ww_ctx_backoff(&ww);
|
||||
if (!err)
|
||||
goto retry;
|
||||
}
|
||||
i915_gem_ww_ctx_fini(&ww);
|
||||
|
||||
i915_vma_unpin(w->sleeve->vma);
|
||||
intel_engine_pm_put(w->ce->engine);
|
||||
|
||||
if (unlikely(err)) {
|
||||
dma_fence_set_error(&w->dma, err);
|
||||
dma_fence_signal(&w->dma);
|
||||
@ -234,6 +249,44 @@ out_signal:
|
||||
}
|
||||
}
|
||||
|
||||
static int pin_wait_clear_pages_work(struct clear_pages_work *w,
|
||||
struct intel_context *ce)
|
||||
{
|
||||
struct i915_vma *vma = w->sleeve->vma;
|
||||
struct i915_gem_ww_ctx ww;
|
||||
int err;
|
||||
|
||||
i915_gem_ww_ctx_init(&ww, false);
|
||||
retry:
|
||||
err = i915_gem_object_lock(vma->obj, &ww);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
|
||||
if (unlikely(err))
|
||||
goto out;
|
||||
|
||||
err = i915_sw_fence_await_reservation(&w->wait,
|
||||
vma->obj->base.resv, NULL,
|
||||
true, 0, I915_FENCE_GFP);
|
||||
if (err)
|
||||
goto err_unpin_vma;
|
||||
|
||||
dma_resv_add_excl_fence(vma->obj->base.resv, &w->dma);
|
||||
|
||||
err_unpin_vma:
|
||||
if (err)
|
||||
i915_vma_unpin(vma);
|
||||
out:
|
||||
if (err == -EDEADLK) {
|
||||
err = i915_gem_ww_ctx_backoff(&ww);
|
||||
if (!err)
|
||||
goto retry;
|
||||
}
|
||||
i915_gem_ww_ctx_fini(&ww);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int __i915_sw_fence_call
|
||||
clear_pages_work_notify(struct i915_sw_fence *fence,
|
||||
enum i915_sw_fence_notify state)
|
||||
@ -287,17 +340,9 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
|
||||
dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0);
|
||||
i915_sw_fence_init(&work->wait, clear_pages_work_notify);
|
||||
|
||||
i915_gem_object_lock(obj);
|
||||
err = i915_sw_fence_await_reservation(&work->wait,
|
||||
obj->base.resv, NULL, true, 0,
|
||||
I915_FENCE_GFP);
|
||||
if (err < 0) {
|
||||
err = pin_wait_clear_pages_work(work, ce);
|
||||
if (err < 0)
|
||||
dma_fence_set_error(&work->dma, err);
|
||||
} else {
|
||||
dma_resv_add_excl_fence(obj->base.resv, &work->dma);
|
||||
err = 0;
|
||||
}
|
||||
i915_gem_object_unlock(obj);
|
||||
|
||||
dma_fence_get(&work->dma);
|
||||
i915_sw_fence_commit(&work->wait);
|
||||
|
@ -439,29 +439,36 @@ static bool __cancel_engine(struct intel_engine_cs *engine)
|
||||
return __reset_engine(engine);
|
||||
}
|
||||
|
||||
static struct intel_engine_cs *__active_engine(struct i915_request *rq)
|
||||
static bool
|
||||
__active_engine(struct i915_request *rq, struct intel_engine_cs **active)
|
||||
{
|
||||
struct intel_engine_cs *engine, *locked;
|
||||
bool ret = false;
|
||||
|
||||
/*
|
||||
* Serialise with __i915_request_submit() so that it sees
|
||||
* is-banned?, or we know the request is already inflight.
|
||||
*
|
||||
* Note that rq->engine is unstable, and so we double
|
||||
* check that we have acquired the lock on the final engine.
|
||||
*/
|
||||
locked = READ_ONCE(rq->engine);
|
||||
spin_lock_irq(&locked->active.lock);
|
||||
while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
|
||||
spin_unlock(&locked->active.lock);
|
||||
spin_lock(&engine->active.lock);
|
||||
locked = engine;
|
||||
spin_lock(&locked->active.lock);
|
||||
}
|
||||
|
||||
engine = NULL;
|
||||
if (i915_request_is_active(rq) && rq->fence.error != -EIO)
|
||||
engine = rq->engine;
|
||||
if (!i915_request_completed(rq)) {
|
||||
if (i915_request_is_active(rq) && rq->fence.error != -EIO)
|
||||
*active = locked;
|
||||
ret = true;
|
||||
}
|
||||
|
||||
spin_unlock_irq(&locked->active.lock);
|
||||
|
||||
return engine;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct intel_engine_cs *active_engine(struct intel_context *ce)
|
||||
@ -472,17 +479,16 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
|
||||
if (!ce->timeline)
|
||||
return NULL;
|
||||
|
||||
mutex_lock(&ce->timeline->mutex);
|
||||
list_for_each_entry_reverse(rq, &ce->timeline->requests, link) {
|
||||
if (i915_request_completed(rq))
|
||||
break;
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(rq, &ce->timeline->requests, link) {
|
||||
if (i915_request_is_active(rq) && i915_request_completed(rq))
|
||||
continue;
|
||||
|
||||
/* Check with the backend if the request is inflight */
|
||||
engine = __active_engine(rq);
|
||||
if (engine)
|
||||
if (__active_engine(rq, &engine))
|
||||
break;
|
||||
}
|
||||
mutex_unlock(&ce->timeline->mutex);
|
||||
rcu_read_unlock();
|
||||
|
||||
return engine;
|
||||
}
|
||||
@ -713,6 +719,7 @@ __create_context(struct drm_i915_private *i915)
|
||||
ctx->i915 = i915;
|
||||
ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL);
|
||||
mutex_init(&ctx->mutex);
|
||||
INIT_LIST_HEAD(&ctx->link);
|
||||
|
||||
spin_lock_init(&ctx->stale.lock);
|
||||
INIT_LIST_HEAD(&ctx->stale.engines);
|
||||
@ -740,10 +747,6 @@ __create_context(struct drm_i915_private *i915)
|
||||
for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
|
||||
ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
|
||||
|
||||
spin_lock(&i915->gem.contexts.lock);
|
||||
list_add_tail(&ctx->link, &i915->gem.contexts.list);
|
||||
spin_unlock(&i915->gem.contexts.lock);
|
||||
|
||||
return ctx;
|
||||
|
||||
err_free:
|
||||
@ -889,7 +892,7 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
|
||||
if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) {
|
||||
struct intel_timeline *timeline;
|
||||
|
||||
timeline = intel_timeline_create(&i915->gt, NULL);
|
||||
timeline = intel_timeline_create(&i915->gt);
|
||||
if (IS_ERR(timeline)) {
|
||||
context_close(ctx);
|
||||
return ERR_CAST(timeline);
|
||||
@ -931,6 +934,7 @@ static int gem_context_register(struct i915_gem_context *ctx,
|
||||
struct drm_i915_file_private *fpriv,
|
||||
u32 *id)
|
||||
{
|
||||
struct drm_i915_private *i915 = ctx->i915;
|
||||
struct i915_address_space *vm;
|
||||
int ret;
|
||||
|
||||
@ -949,8 +953,16 @@ static int gem_context_register(struct i915_gem_context *ctx,
|
||||
/* And finally expose ourselves to userspace via the idr */
|
||||
ret = xa_alloc(&fpriv->context_xa, id, ctx, xa_limit_32b, GFP_KERNEL);
|
||||
if (ret)
|
||||
put_pid(fetch_and_zero(&ctx->pid));
|
||||
goto err_pid;
|
||||
|
||||
spin_lock(&i915->gem.contexts.lock);
|
||||
list_add_tail(&ctx->link, &i915->gem.contexts.list);
|
||||
spin_unlock(&i915->gem.contexts.lock);
|
||||
|
||||
return 0;
|
||||
|
||||
err_pid:
|
||||
put_pid(fetch_and_zero(&ctx->pid));
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1094,6 +1106,7 @@ I915_SELFTEST_DECLARE(static intel_engine_mask_t context_barrier_inject_fault);
|
||||
static int context_barrier_task(struct i915_gem_context *ctx,
|
||||
intel_engine_mask_t engines,
|
||||
bool (*skip)(struct intel_context *ce, void *data),
|
||||
int (*pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data),
|
||||
int (*emit)(struct i915_request *rq, void *data),
|
||||
void (*task)(void *data),
|
||||
void *data)
|
||||
@ -1101,6 +1114,7 @@ static int context_barrier_task(struct i915_gem_context *ctx,
|
||||
struct context_barrier_task *cb;
|
||||
struct i915_gem_engines_iter it;
|
||||
struct i915_gem_engines *e;
|
||||
struct i915_gem_ww_ctx ww;
|
||||
struct intel_context *ce;
|
||||
int err = 0;
|
||||
|
||||
@ -1138,10 +1152,21 @@ static int context_barrier_task(struct i915_gem_context *ctx,
|
||||
if (skip && skip(ce, data))
|
||||
continue;
|
||||
|
||||
rq = intel_context_create_request(ce);
|
||||
i915_gem_ww_ctx_init(&ww, true);
|
||||
retry:
|
||||
err = intel_context_pin_ww(ce, &ww);
|
||||
if (err)
|
||||
goto err;
|
||||
|
||||
if (pin)
|
||||
err = pin(ce, &ww, data);
|
||||
if (err)
|
||||
goto err_unpin;
|
||||
|
||||
rq = i915_request_create(ce);
|
||||
if (IS_ERR(rq)) {
|
||||
err = PTR_ERR(rq);
|
||||
break;
|
||||
goto err_unpin;
|
||||
}
|
||||
|
||||
err = 0;
|
||||
@ -1151,6 +1176,16 @@ static int context_barrier_task(struct i915_gem_context *ctx,
|
||||
err = i915_active_add_request(&cb->base, rq);
|
||||
|
||||
i915_request_add(rq);
|
||||
err_unpin:
|
||||
intel_context_unpin(ce);
|
||||
err:
|
||||
if (err == -EDEADLK) {
|
||||
err = i915_gem_ww_ctx_backoff(&ww);
|
||||
if (!err)
|
||||
goto retry;
|
||||
}
|
||||
i915_gem_ww_ctx_fini(&ww);
|
||||
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
@ -1206,6 +1241,17 @@ static void set_ppgtt_barrier(void *data)
|
||||
i915_vm_close(old);
|
||||
}
|
||||
|
||||
static int pin_ppgtt_update(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data)
|
||||
{
|
||||
struct i915_address_space *vm = ce->vm;
|
||||
|
||||
if (!HAS_LOGICAL_RING_CONTEXTS(vm->i915))
|
||||
/* ppGTT is not part of the legacy context image */
|
||||
return gen6_ppgtt_pin(i915_vm_to_ppgtt(vm), ww);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int emit_ppgtt_update(struct i915_request *rq, void *data)
|
||||
{
|
||||
struct i915_address_space *vm = rq->context->vm;
|
||||
@ -1262,20 +1308,10 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
|
||||
|
||||
static bool skip_ppgtt_update(struct intel_context *ce, void *data)
|
||||
{
|
||||
if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
|
||||
return true;
|
||||
|
||||
if (HAS_LOGICAL_RING_CONTEXTS(ce->engine->i915))
|
||||
return false;
|
||||
|
||||
if (!atomic_read(&ce->pin_count))
|
||||
return true;
|
||||
|
||||
/* ppGTT is not part of the legacy context image */
|
||||
if (gen6_ppgtt_pin(i915_vm_to_ppgtt(ce->vm)))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
return !ce->state;
|
||||
else
|
||||
return !atomic_read(&ce->pin_count);
|
||||
}
|
||||
|
||||
static int set_ppgtt(struct drm_i915_file_private *file_priv,
|
||||
@ -1326,6 +1362,7 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
|
||||
*/
|
||||
err = context_barrier_task(ctx, ALL_ENGINES,
|
||||
skip_ppgtt_update,
|
||||
pin_ppgtt_update,
|
||||
emit_ppgtt_update,
|
||||
set_ppgtt_barrier,
|
||||
old);
|
||||
|
@ -128,7 +128,7 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = i915_gem_object_lock_interruptible(obj);
|
||||
err = i915_gem_object_lock_interruptible(obj, NULL);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
@ -149,7 +149,7 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direct
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = i915_gem_object_lock_interruptible(obj);
|
||||
err = i915_gem_object_lock_interruptible(obj, NULL);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
|
@ -32,11 +32,17 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
|
||||
if (!i915_gem_object_is_framebuffer(obj))
|
||||
return;
|
||||
|
||||
i915_gem_object_lock(obj);
|
||||
i915_gem_object_lock(obj, NULL);
|
||||
__i915_gem_object_flush_for_display(obj);
|
||||
i915_gem_object_unlock(obj);
|
||||
}
|
||||
|
||||
void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
if (i915_gem_object_is_framebuffer(obj))
|
||||
__i915_gem_object_flush_for_display(obj);
|
||||
}
|
||||
|
||||
/**
|
||||
* Moves a single object to the WC read, and possibly write domain.
|
||||
* @obj: object to act on
|
||||
@ -197,18 +203,12 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = i915_gem_object_lock_interruptible(obj);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Always invalidate stale cachelines */
|
||||
if (obj->cache_level != cache_level) {
|
||||
i915_gem_object_set_cache_coherency(obj, cache_level);
|
||||
obj->cache_dirty = true;
|
||||
}
|
||||
|
||||
i915_gem_object_unlock(obj);
|
||||
|
||||
/* The cache-level will be applied when each vma is rebound. */
|
||||
return i915_gem_object_unbind(obj,
|
||||
I915_GEM_OBJECT_UNBIND_ACTIVE |
|
||||
@ -293,7 +293,12 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = i915_gem_object_lock_interruptible(obj, NULL);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = i915_gem_object_set_cache_level(obj, level);
|
||||
i915_gem_object_unlock(obj);
|
||||
|
||||
out:
|
||||
i915_gem_object_put(obj);
|
||||
@ -313,6 +318,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct drm_i915_private *i915 = to_i915(obj->base.dev);
|
||||
struct i915_gem_ww_ctx ww;
|
||||
struct i915_vma *vma;
|
||||
int ret;
|
||||
|
||||
@ -320,6 +326,11 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
|
||||
if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
i915_gem_ww_ctx_init(&ww, true);
|
||||
retry:
|
||||
ret = i915_gem_object_lock(obj, &ww);
|
||||
if (ret)
|
||||
goto err;
|
||||
/*
|
||||
* The display engine is not coherent with the LLC cache on gen6. As
|
||||
* a result, we make sure that the pinning that is about to occur is
|
||||
@ -334,7 +345,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
|
||||
HAS_WT(i915) ?
|
||||
I915_CACHE_WT : I915_CACHE_NONE);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
goto err;
|
||||
|
||||
/*
|
||||
* As the user may map the buffer once pinned in the display plane
|
||||
@ -347,18 +358,31 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
|
||||
vma = ERR_PTR(-ENOSPC);
|
||||
if ((flags & PIN_MAPPABLE) == 0 &&
|
||||
(!view || view->type == I915_GGTT_VIEW_NORMAL))
|
||||
vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
|
||||
flags |
|
||||
PIN_MAPPABLE |
|
||||
PIN_NONBLOCK);
|
||||
if (IS_ERR(vma))
|
||||
vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
|
||||
if (IS_ERR(vma))
|
||||
return vma;
|
||||
vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0, alignment,
|
||||
flags | PIN_MAPPABLE |
|
||||
PIN_NONBLOCK);
|
||||
if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
|
||||
vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0,
|
||||
alignment, flags);
|
||||
if (IS_ERR(vma)) {
|
||||
ret = PTR_ERR(vma);
|
||||
goto err;
|
||||
}
|
||||
|
||||
vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
|
||||
|
||||
i915_gem_object_flush_if_display(obj);
|
||||
i915_gem_object_flush_if_display_locked(obj);
|
||||
|
||||
err:
|
||||
if (ret == -EDEADLK) {
|
||||
ret = i915_gem_ww_ctx_backoff(&ww);
|
||||
if (!ret)
|
||||
goto retry;
|
||||
}
|
||||
i915_gem_ww_ctx_fini(&ww);
|
||||
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
return vma;
|
||||
}
|
||||
@ -536,7 +560,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = i915_gem_object_lock_interruptible(obj);
|
||||
err = i915_gem_object_lock_interruptible(obj, NULL);
|
||||
if (err)
|
||||
goto out_unpin;
|
||||
|
||||
@ -576,19 +600,17 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
|
||||
if (!i915_gem_object_has_struct_page(obj))
|
||||
return -ENODEV;
|
||||
|
||||
ret = i915_gem_object_lock_interruptible(obj);
|
||||
if (ret)
|
||||
return ret;
|
||||
assert_object_held(obj);
|
||||
|
||||
ret = i915_gem_object_wait(obj,
|
||||
I915_WAIT_INTERRUPTIBLE,
|
||||
MAX_SCHEDULE_TIMEOUT);
|
||||
if (ret)
|
||||
goto err_unlock;
|
||||
return ret;
|
||||
|
||||
ret = i915_gem_object_pin_pages(obj);
|
||||
if (ret)
|
||||
goto err_unlock;
|
||||
return ret;
|
||||
|
||||
if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
|
||||
!static_cpu_has(X86_FEATURE_CLFLUSH)) {
|
||||
@ -616,8 +638,6 @@ out:
|
||||
|
||||
err_unpin:
|
||||
i915_gem_object_unpin_pages(obj);
|
||||
err_unlock:
|
||||
i915_gem_object_unlock(obj);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -630,20 +650,18 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
|
||||
if (!i915_gem_object_has_struct_page(obj))
|
||||
return -ENODEV;
|
||||
|
||||
ret = i915_gem_object_lock_interruptible(obj);
|
||||
if (ret)
|
||||
return ret;
|
||||
assert_object_held(obj);
|
||||
|
||||
ret = i915_gem_object_wait(obj,
|
||||
I915_WAIT_INTERRUPTIBLE |
|
||||
I915_WAIT_ALL,
|
||||
MAX_SCHEDULE_TIMEOUT);
|
||||
if (ret)
|
||||
goto err_unlock;
|
||||
return ret;
|
||||
|
||||
ret = i915_gem_object_pin_pages(obj);
|
||||
if (ret)
|
||||
goto err_unlock;
|
||||
return ret;
|
||||
|
||||
if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
|
||||
!static_cpu_has(X86_FEATURE_CLFLUSH)) {
|
||||
@ -680,7 +698,5 @@ out:
|
||||
|
||||
err_unpin:
|
||||
i915_gem_object_unpin_pages(obj);
|
||||
err_unlock:
|
||||
i915_gem_object_unlock(obj);
|
||||
return ret;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -283,37 +283,46 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
|
||||
struct intel_runtime_pm *rpm = &i915->runtime_pm;
|
||||
struct i915_ggtt *ggtt = &i915->ggtt;
|
||||
bool write = area->vm_flags & VM_WRITE;
|
||||
struct i915_gem_ww_ctx ww;
|
||||
intel_wakeref_t wakeref;
|
||||
struct i915_vma *vma;
|
||||
pgoff_t page_offset;
|
||||
int srcu;
|
||||
int ret;
|
||||
|
||||
/* Sanity check that we allow writing into this object */
|
||||
if (i915_gem_object_is_readonly(obj) && write)
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
/* We don't use vmf->pgoff since that has the fake offset */
|
||||
page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
|
||||
|
||||
trace_i915_gem_object_fault(obj, page_offset, true, write);
|
||||
|
||||
ret = i915_gem_object_pin_pages(obj);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
wakeref = intel_runtime_pm_get(rpm);
|
||||
|
||||
ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu);
|
||||
i915_gem_ww_ctx_init(&ww, true);
|
||||
retry:
|
||||
ret = i915_gem_object_lock(obj, &ww);
|
||||
if (ret)
|
||||
goto err_rpm;
|
||||
|
||||
/* Sanity check that we allow writing into this object */
|
||||
if (i915_gem_object_is_readonly(obj) && write) {
|
||||
ret = -EFAULT;
|
||||
goto err_rpm;
|
||||
}
|
||||
|
||||
ret = i915_gem_object_pin_pages(obj);
|
||||
if (ret)
|
||||
goto err_rpm;
|
||||
|
||||
ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu);
|
||||
if (ret)
|
||||
goto err_pages;
|
||||
|
||||
/* Now pin it into the GTT as needed */
|
||||
vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
|
||||
PIN_MAPPABLE |
|
||||
PIN_NONBLOCK /* NOWARN */ |
|
||||
PIN_NOEVICT);
|
||||
if (IS_ERR(vma)) {
|
||||
vma = i915_gem_object_ggtt_pin_ww(obj, &ww, NULL, 0, 0,
|
||||
PIN_MAPPABLE |
|
||||
PIN_NONBLOCK /* NOWARN */ |
|
||||
PIN_NOEVICT);
|
||||
if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) {
|
||||
/* Use a partial view if it is bigger than available space */
|
||||
struct i915_ggtt_view view =
|
||||
compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
|
||||
@ -328,11 +337,11 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
|
||||
* all hope that the hardware is able to track future writes.
|
||||
*/
|
||||
|
||||
vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
|
||||
if (IS_ERR(vma)) {
|
||||
vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags);
|
||||
if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) {
|
||||
flags = PIN_MAPPABLE;
|
||||
view.type = I915_GGTT_VIEW_PARTIAL;
|
||||
vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
|
||||
vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags);
|
||||
}
|
||||
|
||||
/* The entire mappable GGTT is pinned? Unexpected! */
|
||||
@ -389,10 +398,16 @@ err_unpin:
|
||||
__i915_vma_unpin(vma);
|
||||
err_reset:
|
||||
intel_gt_reset_unlock(ggtt->vm.gt, srcu);
|
||||
err_rpm:
|
||||
intel_runtime_pm_put(rpm, wakeref);
|
||||
err_pages:
|
||||
i915_gem_object_unpin_pages(obj);
|
||||
err:
|
||||
err_rpm:
|
||||
if (ret == -EDEADLK) {
|
||||
ret = i915_gem_ww_ctx_backoff(&ww);
|
||||
if (!ret)
|
||||
goto retry;
|
||||
}
|
||||
i915_gem_ww_ctx_fini(&ww);
|
||||
intel_runtime_pm_put(rpm, wakeref);
|
||||
return i915_error_to_vmf_fault(ret);
|
||||
}
|
||||
|
||||
|
@ -110,9 +110,39 @@ i915_gem_object_put(struct drm_i915_gem_object *obj)
|
||||
|
||||
#define assert_object_held(obj) dma_resv_assert_held((obj)->base.resv)
|
||||
|
||||
static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj)
|
||||
static inline int __i915_gem_object_lock(struct drm_i915_gem_object *obj,
|
||||
struct i915_gem_ww_ctx *ww,
|
||||
bool intr)
|
||||
{
|
||||
dma_resv_lock(obj->base.resv, NULL);
|
||||
int ret;
|
||||
|
||||
if (intr)
|
||||
ret = dma_resv_lock_interruptible(obj->base.resv, ww ? &ww->ctx : NULL);
|
||||
else
|
||||
ret = dma_resv_lock(obj->base.resv, ww ? &ww->ctx : NULL);
|
||||
|
||||
if (!ret && ww)
|
||||
list_add_tail(&obj->obj_link, &ww->obj_list);
|
||||
if (ret == -EALREADY)
|
||||
ret = 0;
|
||||
|
||||
if (ret == -EDEADLK)
|
||||
ww->contended = obj;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int i915_gem_object_lock(struct drm_i915_gem_object *obj,
|
||||
struct i915_gem_ww_ctx *ww)
|
||||
{
|
||||
return __i915_gem_object_lock(obj, ww, ww && ww->intr);
|
||||
}
|
||||
|
||||
static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj,
|
||||
struct i915_gem_ww_ctx *ww)
|
||||
{
|
||||
WARN_ON(ww && !ww->intr);
|
||||
return __i915_gem_object_lock(obj, ww, true);
|
||||
}
|
||||
|
||||
static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
|
||||
@ -120,12 +150,6 @@ static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj)
|
||||
return dma_resv_trylock(obj->base.resv);
|
||||
}
|
||||
|
||||
static inline int
|
||||
i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
return dma_resv_lock_interruptible(obj->base.resv, NULL);
|
||||
}
|
||||
|
||||
static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
dma_resv_unlock(obj->base.resv);
|
||||
@ -412,7 +436,6 @@ static inline void
|
||||
i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
i915_gem_object_unpin_pages(obj);
|
||||
i915_gem_object_unlock(obj);
|
||||
}
|
||||
|
||||
static inline struct intel_engine_cs *
|
||||
@ -435,6 +458,7 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
|
||||
void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
|
||||
unsigned int cache_level);
|
||||
void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
|
||||
void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj);
|
||||
|
||||
int __must_check
|
||||
i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
|
||||
|
@ -14,6 +14,7 @@
|
||||
|
||||
struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
|
||||
struct i915_vma *vma,
|
||||
struct i915_gem_ww_ctx *ww,
|
||||
u32 value)
|
||||
{
|
||||
struct drm_i915_private *i915 = ce->vm->i915;
|
||||
@ -39,10 +40,24 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
|
||||
goto out_pm;
|
||||
}
|
||||
|
||||
err = i915_gem_object_lock(pool->obj, ww);
|
||||
if (err)
|
||||
goto out_put;
|
||||
|
||||
batch = i915_vma_instance(pool->obj, ce->vm, NULL);
|
||||
if (IS_ERR(batch)) {
|
||||
err = PTR_ERR(batch);
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER);
|
||||
if (unlikely(err))
|
||||
goto out_put;
|
||||
|
||||
cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
|
||||
if (IS_ERR(cmd)) {
|
||||
err = PTR_ERR(cmd);
|
||||
goto out_put;
|
||||
goto out_unpin;
|
||||
}
|
||||
|
||||
rem = vma->size;
|
||||
@ -84,19 +99,11 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
|
||||
|
||||
intel_gt_chipset_flush(ce->vm->gt);
|
||||
|
||||
batch = i915_vma_instance(pool->obj, ce->vm, NULL);
|
||||
if (IS_ERR(batch)) {
|
||||
err = PTR_ERR(batch);
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
err = i915_vma_pin(batch, 0, 0, PIN_USER);
|
||||
if (unlikely(err))
|
||||
goto out_put;
|
||||
|
||||
batch->private = pool;
|
||||
return batch;
|
||||
|
||||
out_unpin:
|
||||
i915_vma_unpin(batch);
|
||||
out_put:
|
||||
intel_gt_buffer_pool_put(pool);
|
||||
out_pm:
|
||||
@ -108,11 +115,9 @@ int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq)
|
||||
{
|
||||
int err;
|
||||
|
||||
i915_vma_lock(vma);
|
||||
err = i915_request_await_object(rq, vma->obj, false);
|
||||
if (err == 0)
|
||||
err = i915_vma_move_to_active(vma, rq, 0);
|
||||
i915_vma_unlock(vma);
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
|
||||
@ -141,6 +146,7 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
|
||||
struct intel_context *ce,
|
||||
u32 value)
|
||||
{
|
||||
struct i915_gem_ww_ctx ww;
|
||||
struct i915_request *rq;
|
||||
struct i915_vma *batch;
|
||||
struct i915_vma *vma;
|
||||
@ -150,17 +156,28 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
|
||||
if (IS_ERR(vma))
|
||||
return PTR_ERR(vma);
|
||||
|
||||
err = i915_vma_pin(vma, 0, 0, PIN_USER);
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
i915_gem_ww_ctx_init(&ww, true);
|
||||
intel_engine_pm_get(ce->engine);
|
||||
retry:
|
||||
err = i915_gem_object_lock(obj, &ww);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
batch = intel_emit_vma_fill_blt(ce, vma, value);
|
||||
err = intel_context_pin_ww(ce, &ww);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
|
||||
if (err)
|
||||
goto out_ctx;
|
||||
|
||||
batch = intel_emit_vma_fill_blt(ce, vma, &ww, value);
|
||||
if (IS_ERR(batch)) {
|
||||
err = PTR_ERR(batch);
|
||||
goto out_unpin;
|
||||
goto out_vma;
|
||||
}
|
||||
|
||||
rq = intel_context_create_request(ce);
|
||||
rq = i915_request_create(ce);
|
||||
if (IS_ERR(rq)) {
|
||||
err = PTR_ERR(rq);
|
||||
goto out_batch;
|
||||
@ -170,11 +187,9 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
|
||||
if (unlikely(err))
|
||||
goto out_request;
|
||||
|
||||
i915_vma_lock(vma);
|
||||
err = move_obj_to_gpu(vma->obj, rq, true);
|
||||
if (err == 0)
|
||||
err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
|
||||
i915_vma_unlock(vma);
|
||||
if (unlikely(err))
|
||||
goto out_request;
|
||||
|
||||
@ -193,8 +208,18 @@ out_request:
|
||||
i915_request_add(rq);
|
||||
out_batch:
|
||||
intel_emit_vma_release(ce, batch);
|
||||
out_unpin:
|
||||
out_vma:
|
||||
i915_vma_unpin(vma);
|
||||
out_ctx:
|
||||
intel_context_unpin(ce);
|
||||
out:
|
||||
if (err == -EDEADLK) {
|
||||
err = i915_gem_ww_ctx_backoff(&ww);
|
||||
if (!err)
|
||||
goto retry;
|
||||
}
|
||||
i915_gem_ww_ctx_fini(&ww);
|
||||
intel_engine_pm_put(ce->engine);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -210,6 +235,7 @@ static bool wa_1209644611_applies(struct drm_i915_private *i915, u32 size)
|
||||
}
|
||||
|
||||
struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
|
||||
struct i915_gem_ww_ctx *ww,
|
||||
struct i915_vma *src,
|
||||
struct i915_vma *dst)
|
||||
{
|
||||
@ -236,10 +262,24 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
|
||||
goto out_pm;
|
||||
}
|
||||
|
||||
err = i915_gem_object_lock(pool->obj, ww);
|
||||
if (err)
|
||||
goto out_put;
|
||||
|
||||
batch = i915_vma_instance(pool->obj, ce->vm, NULL);
|
||||
if (IS_ERR(batch)) {
|
||||
err = PTR_ERR(batch);
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER);
|
||||
if (unlikely(err))
|
||||
goto out_put;
|
||||
|
||||
cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
|
||||
if (IS_ERR(cmd)) {
|
||||
err = PTR_ERR(cmd);
|
||||
goto out_put;
|
||||
goto out_unpin;
|
||||
}
|
||||
|
||||
rem = src->size;
|
||||
@ -296,20 +336,11 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
|
||||
i915_gem_object_unpin_map(pool->obj);
|
||||
|
||||
intel_gt_chipset_flush(ce->vm->gt);
|
||||
|
||||
batch = i915_vma_instance(pool->obj, ce->vm, NULL);
|
||||
if (IS_ERR(batch)) {
|
||||
err = PTR_ERR(batch);
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
err = i915_vma_pin(batch, 0, 0, PIN_USER);
|
||||
if (unlikely(err))
|
||||
goto out_put;
|
||||
|
||||
batch->private = pool;
|
||||
return batch;
|
||||
|
||||
out_unpin:
|
||||
i915_vma_unpin(batch);
|
||||
out_put:
|
||||
intel_gt_buffer_pool_put(pool);
|
||||
out_pm:
|
||||
@ -321,10 +352,9 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
|
||||
struct drm_i915_gem_object *dst,
|
||||
struct intel_context *ce)
|
||||
{
|
||||
struct drm_gem_object *objs[] = { &src->base, &dst->base };
|
||||
struct i915_address_space *vm = ce->vm;
|
||||
struct i915_vma *vma[2], *batch;
|
||||
struct ww_acquire_ctx acquire;
|
||||
struct i915_gem_ww_ctx ww;
|
||||
struct i915_request *rq;
|
||||
int err, i;
|
||||
|
||||
@ -332,25 +362,36 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
|
||||
if (IS_ERR(vma[0]))
|
||||
return PTR_ERR(vma[0]);
|
||||
|
||||
err = i915_vma_pin(vma[0], 0, 0, PIN_USER);
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
|
||||
vma[1] = i915_vma_instance(dst, vm, NULL);
|
||||
if (IS_ERR(vma[1]))
|
||||
goto out_unpin_src;
|
||||
return PTR_ERR(vma);
|
||||
|
||||
err = i915_vma_pin(vma[1], 0, 0, PIN_USER);
|
||||
i915_gem_ww_ctx_init(&ww, true);
|
||||
intel_engine_pm_get(ce->engine);
|
||||
retry:
|
||||
err = i915_gem_object_lock(src, &ww);
|
||||
if (!err)
|
||||
err = i915_gem_object_lock(dst, &ww);
|
||||
if (!err)
|
||||
err = intel_context_pin_ww(ce, &ww);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = i915_vma_pin_ww(vma[0], &ww, 0, 0, PIN_USER);
|
||||
if (err)
|
||||
goto out_ctx;
|
||||
|
||||
err = i915_vma_pin_ww(vma[1], &ww, 0, 0, PIN_USER);
|
||||
if (unlikely(err))
|
||||
goto out_unpin_src;
|
||||
|
||||
batch = intel_emit_vma_copy_blt(ce, vma[0], vma[1]);
|
||||
batch = intel_emit_vma_copy_blt(ce, &ww, vma[0], vma[1]);
|
||||
if (IS_ERR(batch)) {
|
||||
err = PTR_ERR(batch);
|
||||
goto out_unpin_dst;
|
||||
}
|
||||
|
||||
rq = intel_context_create_request(ce);
|
||||
rq = i915_request_create(ce);
|
||||
if (IS_ERR(rq)) {
|
||||
err = PTR_ERR(rq);
|
||||
goto out_batch;
|
||||
@ -360,14 +401,10 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
|
||||
if (unlikely(err))
|
||||
goto out_request;
|
||||
|
||||
err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire);
|
||||
if (unlikely(err))
|
||||
goto out_request;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(vma); i++) {
|
||||
err = move_obj_to_gpu(vma[i]->obj, rq, i);
|
||||
if (unlikely(err))
|
||||
goto out_unlock;
|
||||
goto out_request;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(vma); i++) {
|
||||
@ -375,20 +412,19 @@ int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
|
||||
|
||||
err = i915_vma_move_to_active(vma[i], rq, flags);
|
||||
if (unlikely(err))
|
||||
goto out_unlock;
|
||||
goto out_request;
|
||||
}
|
||||
|
||||
if (rq->engine->emit_init_breadcrumb) {
|
||||
err = rq->engine->emit_init_breadcrumb(rq);
|
||||
if (unlikely(err))
|
||||
goto out_unlock;
|
||||
goto out_request;
|
||||
}
|
||||
|
||||
err = rq->engine->emit_bb_start(rq,
|
||||
batch->node.start, batch->node.size,
|
||||
0);
|
||||
out_unlock:
|
||||
drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire);
|
||||
|
||||
out_request:
|
||||
if (unlikely(err))
|
||||
i915_request_set_error_once(rq, err);
|
||||
@ -400,6 +436,16 @@ out_unpin_dst:
|
||||
i915_vma_unpin(vma[1]);
|
||||
out_unpin_src:
|
||||
i915_vma_unpin(vma[0]);
|
||||
out_ctx:
|
||||
intel_context_unpin(ce);
|
||||
out:
|
||||
if (err == -EDEADLK) {
|
||||
err = i915_gem_ww_ctx_backoff(&ww);
|
||||
if (!err)
|
||||
goto retry;
|
||||
}
|
||||
i915_gem_ww_ctx_fini(&ww);
|
||||
intel_engine_pm_put(ce->engine);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -13,12 +13,15 @@
|
||||
#include "i915_vma.h"
|
||||
|
||||
struct drm_i915_gem_object;
|
||||
struct i915_gem_ww_ctx;
|
||||
|
||||
struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
|
||||
struct i915_vma *vma,
|
||||
struct i915_gem_ww_ctx *ww,
|
||||
u32 value);
|
||||
|
||||
struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
|
||||
struct i915_gem_ww_ctx *ww,
|
||||
struct i915_vma *src,
|
||||
struct i915_vma *dst);
|
||||
|
||||
|
@ -123,6 +123,15 @@ struct drm_i915_gem_object {
|
||||
struct list_head lut_list;
|
||||
spinlock_t lut_lock; /* guards lut_list */
|
||||
|
||||
/**
|
||||
* @obj_link: Link into @i915_gem_ww_ctx.obj_list
|
||||
*
|
||||
* When we lock this object through i915_gem_object_lock() with a
|
||||
* context, we add it to the list to ensure we can unlock everything
|
||||
* when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called.
|
||||
*/
|
||||
struct list_head obj_link;
|
||||
|
||||
/** Stolen memory for this object, instead of being backed by shmem. */
|
||||
struct drm_mm_node *stolen;
|
||||
union {
|
||||
@ -282,6 +291,7 @@ struct drm_i915_gem_object {
|
||||
} userptr;
|
||||
|
||||
unsigned long scratch;
|
||||
u64 encode;
|
||||
|
||||
void *gvt_info;
|
||||
};
|
||||
|
@ -84,7 +84,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
|
||||
|
||||
spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
|
||||
|
||||
i915_gem_object_lock(obj);
|
||||
i915_gem_object_lock(obj, NULL);
|
||||
drm_WARN_ON(&i915->drm,
|
||||
i915_gem_object_set_to_gtt_domain(obj, false));
|
||||
i915_gem_object_unlock(obj);
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <drm/drm_file.h>
|
||||
|
||||
#include "i915_drv.h"
|
||||
#include "i915_gem_context.h"
|
||||
#include "i915_gem_ioctls.h"
|
||||
#include "i915_gem_object.h"
|
||||
|
||||
@ -35,9 +36,10 @@ int
|
||||
i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
|
||||
struct drm_file *file)
|
||||
{
|
||||
const unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
|
||||
struct drm_i915_file_private *file_priv = file->driver_priv;
|
||||
unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
|
||||
struct i915_request *request, *target = NULL;
|
||||
struct i915_gem_context *ctx;
|
||||
unsigned long idx;
|
||||
long ret;
|
||||
|
||||
/* ABI: return -EIO if already wedged */
|
||||
@ -45,27 +47,54 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
spin_lock(&file_priv->mm.lock);
|
||||
list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
|
||||
if (time_after_eq(request->emitted_jiffies, recent_enough))
|
||||
break;
|
||||
rcu_read_lock();
|
||||
xa_for_each(&file_priv->context_xa, idx, ctx) {
|
||||
struct i915_gem_engines_iter it;
|
||||
struct intel_context *ce;
|
||||
|
||||
if (target && xchg(&target->file_priv, NULL))
|
||||
list_del(&target->client_link);
|
||||
if (!kref_get_unless_zero(&ctx->ref))
|
||||
continue;
|
||||
rcu_read_unlock();
|
||||
|
||||
target = request;
|
||||
for_each_gem_engine(ce,
|
||||
i915_gem_context_lock_engines(ctx),
|
||||
it) {
|
||||
struct i915_request *rq, *target = NULL;
|
||||
|
||||
if (!ce->timeline)
|
||||
continue;
|
||||
|
||||
mutex_lock(&ce->timeline->mutex);
|
||||
list_for_each_entry_reverse(rq,
|
||||
&ce->timeline->requests,
|
||||
link) {
|
||||
if (i915_request_completed(rq))
|
||||
break;
|
||||
|
||||
if (time_after(rq->emitted_jiffies,
|
||||
recent_enough))
|
||||
continue;
|
||||
|
||||
target = i915_request_get(rq);
|
||||
break;
|
||||
}
|
||||
mutex_unlock(&ce->timeline->mutex);
|
||||
if (!target)
|
||||
continue;
|
||||
|
||||
ret = i915_request_wait(target,
|
||||
I915_WAIT_INTERRUPTIBLE,
|
||||
MAX_SCHEDULE_TIMEOUT);
|
||||
i915_request_put(target);
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
i915_gem_context_unlock_engines(ctx);
|
||||
i915_gem_context_put(ctx);
|
||||
|
||||
rcu_read_lock();
|
||||
}
|
||||
if (target)
|
||||
i915_request_get(target);
|
||||
spin_unlock(&file_priv->mm.lock);
|
||||
|
||||
if (!target)
|
||||
return 0;
|
||||
|
||||
ret = i915_request_wait(target,
|
||||
I915_WAIT_INTERRUPTIBLE,
|
||||
MAX_SCHEDULE_TIMEOUT);
|
||||
i915_request_put(target);
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret < 0 ? ret : 0;
|
||||
}
|
||||
|
@ -249,7 +249,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
|
||||
* whilst executing a fenced command for an untiled object.
|
||||
*/
|
||||
|
||||
i915_gem_object_lock(obj);
|
||||
i915_gem_object_lock(obj, NULL);
|
||||
if (i915_gem_object_is_framebuffer(obj)) {
|
||||
i915_gem_object_unlock(obj);
|
||||
return -EBUSY;
|
||||
|
@ -393,7 +393,7 @@ static int igt_mock_exhaust_device_supported_pages(void *arg)
|
||||
*/
|
||||
|
||||
for (i = 1; i < BIT(ARRAY_SIZE(page_sizes)); i++) {
|
||||
unsigned int combination = 0;
|
||||
unsigned int combination = SZ_4K; /* Required for ppGTT */
|
||||
|
||||
for (j = 0; j < ARRAY_SIZE(page_sizes); j++) {
|
||||
if (i & BIT(j))
|
||||
@ -947,7 +947,7 @@ static int gpu_write(struct intel_context *ce,
|
||||
{
|
||||
int err;
|
||||
|
||||
i915_gem_object_lock(vma->obj);
|
||||
i915_gem_object_lock(vma->obj, NULL);
|
||||
err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
|
||||
i915_gem_object_unlock(vma->obj);
|
||||
if (err)
|
||||
@ -964,9 +964,10 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
|
||||
unsigned long n;
|
||||
int err;
|
||||
|
||||
i915_gem_object_lock(obj, NULL);
|
||||
err = i915_gem_object_prepare_read(obj, &needs_flush);
|
||||
if (err)
|
||||
return err;
|
||||
goto err_unlock;
|
||||
|
||||
for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) {
|
||||
u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n));
|
||||
@ -986,6 +987,8 @@ __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
|
||||
}
|
||||
|
||||
i915_gem_object_finish_access(obj);
|
||||
err_unlock:
|
||||
i915_gem_object_unlock(obj);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
@ -75,7 +75,7 @@ static int __igt_client_fill(struct intel_engine_cs *engine)
|
||||
if (err)
|
||||
goto err_unpin;
|
||||
|
||||
i915_gem_object_lock(obj);
|
||||
i915_gem_object_lock(obj, NULL);
|
||||
err = i915_gem_object_set_to_cpu_domain(obj, false);
|
||||
i915_gem_object_unlock(obj);
|
||||
if (err)
|
||||
|
@ -27,9 +27,10 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
|
||||
u32 *cpu;
|
||||
int err;
|
||||
|
||||
i915_gem_object_lock(ctx->obj, NULL);
|
||||
err = i915_gem_object_prepare_write(ctx->obj, &needs_clflush);
|
||||
if (err)
|
||||
return err;
|
||||
goto out;
|
||||
|
||||
page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
|
||||
map = kmap_atomic(page);
|
||||
@ -46,7 +47,9 @@ static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
|
||||
kunmap_atomic(map);
|
||||
i915_gem_object_finish_access(ctx->obj);
|
||||
|
||||
return 0;
|
||||
out:
|
||||
i915_gem_object_unlock(ctx->obj);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
|
||||
@ -57,9 +60,10 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
|
||||
u32 *cpu;
|
||||
int err;
|
||||
|
||||
i915_gem_object_lock(ctx->obj, NULL);
|
||||
err = i915_gem_object_prepare_read(ctx->obj, &needs_clflush);
|
||||
if (err)
|
||||
return err;
|
||||
goto out;
|
||||
|
||||
page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
|
||||
map = kmap_atomic(page);
|
||||
@ -73,7 +77,9 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
|
||||
kunmap_atomic(map);
|
||||
i915_gem_object_finish_access(ctx->obj);
|
||||
|
||||
return 0;
|
||||
out:
|
||||
i915_gem_object_unlock(ctx->obj);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
|
||||
@ -82,7 +88,7 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
|
||||
u32 __iomem *map;
|
||||
int err = 0;
|
||||
|
||||
i915_gem_object_lock(ctx->obj);
|
||||
i915_gem_object_lock(ctx->obj, NULL);
|
||||
err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
|
||||
i915_gem_object_unlock(ctx->obj);
|
||||
if (err)
|
||||
@ -115,7 +121,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
|
||||
u32 __iomem *map;
|
||||
int err = 0;
|
||||
|
||||
i915_gem_object_lock(ctx->obj);
|
||||
i915_gem_object_lock(ctx->obj, NULL);
|
||||
err = i915_gem_object_set_to_gtt_domain(ctx->obj, false);
|
||||
i915_gem_object_unlock(ctx->obj);
|
||||
if (err)
|
||||
@ -147,7 +153,7 @@ static int wc_set(struct context *ctx, unsigned long offset, u32 v)
|
||||
u32 *map;
|
||||
int err;
|
||||
|
||||
i915_gem_object_lock(ctx->obj);
|
||||
i915_gem_object_lock(ctx->obj, NULL);
|
||||
err = i915_gem_object_set_to_wc_domain(ctx->obj, true);
|
||||
i915_gem_object_unlock(ctx->obj);
|
||||
if (err)
|
||||
@ -170,7 +176,7 @@ static int wc_get(struct context *ctx, unsigned long offset, u32 *v)
|
||||
u32 *map;
|
||||
int err;
|
||||
|
||||
i915_gem_object_lock(ctx->obj);
|
||||
i915_gem_object_lock(ctx->obj, NULL);
|
||||
err = i915_gem_object_set_to_wc_domain(ctx->obj, false);
|
||||
i915_gem_object_unlock(ctx->obj);
|
||||
if (err)
|
||||
@ -193,27 +199,27 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v)
|
||||
u32 *cs;
|
||||
int err;
|
||||
|
||||
i915_gem_object_lock(ctx->obj);
|
||||
i915_gem_object_lock(ctx->obj, NULL);
|
||||
err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
|
||||
i915_gem_object_unlock(ctx->obj);
|
||||
if (err)
|
||||
return err;
|
||||
goto out_unlock;
|
||||
|
||||
vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0);
|
||||
if (IS_ERR(vma))
|
||||
return PTR_ERR(vma);
|
||||
if (IS_ERR(vma)) {
|
||||
err = PTR_ERR(vma);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
rq = intel_engine_create_kernel_request(ctx->engine);
|
||||
if (IS_ERR(rq)) {
|
||||
i915_vma_unpin(vma);
|
||||
return PTR_ERR(rq);
|
||||
err = PTR_ERR(rq);
|
||||
goto out_unpin;
|
||||
}
|
||||
|
||||
cs = intel_ring_begin(rq, 4);
|
||||
if (IS_ERR(cs)) {
|
||||
i915_request_add(rq);
|
||||
i915_vma_unpin(vma);
|
||||
return PTR_ERR(cs);
|
||||
err = PTR_ERR(cs);
|
||||
goto out_rq;
|
||||
}
|
||||
|
||||
if (INTEL_GEN(ctx->engine->i915) >= 8) {
|
||||
@ -234,14 +240,16 @@ static int gpu_set(struct context *ctx, unsigned long offset, u32 v)
|
||||
}
|
||||
intel_ring_advance(rq, cs);
|
||||
|
||||
i915_vma_lock(vma);
|
||||
err = i915_request_await_object(rq, vma->obj, true);
|
||||
if (err == 0)
|
||||
err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
|
||||
i915_vma_unlock(vma);
|
||||
i915_vma_unpin(vma);
|
||||
|
||||
out_rq:
|
||||
i915_request_add(rq);
|
||||
out_unpin:
|
||||
i915_vma_unpin(vma);
|
||||
out_unlock:
|
||||
i915_gem_object_unlock(ctx->obj);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
@ -461,9 +461,10 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
|
||||
unsigned int n, m, need_flush;
|
||||
int err;
|
||||
|
||||
i915_gem_object_lock(obj, NULL);
|
||||
err = i915_gem_object_prepare_write(obj, &need_flush);
|
||||
if (err)
|
||||
return err;
|
||||
goto out;
|
||||
|
||||
for (n = 0; n < real_page_count(obj); n++) {
|
||||
u32 *map;
|
||||
@ -479,7 +480,9 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
|
||||
i915_gem_object_finish_access(obj);
|
||||
obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
|
||||
obj->write_domain = 0;
|
||||
return 0;
|
||||
out:
|
||||
i915_gem_object_unlock(obj);
|
||||
return err;
|
||||
}
|
||||
|
||||
static noinline int cpu_check(struct drm_i915_gem_object *obj,
|
||||
@ -488,9 +491,10 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
|
||||
unsigned int n, m, needs_flush;
|
||||
int err;
|
||||
|
||||
i915_gem_object_lock(obj, NULL);
|
||||
err = i915_gem_object_prepare_read(obj, &needs_flush);
|
||||
if (err)
|
||||
return err;
|
||||
goto out_unlock;
|
||||
|
||||
for (n = 0; n < real_page_count(obj); n++) {
|
||||
u32 *map;
|
||||
@ -527,6 +531,8 @@ out_unmap:
|
||||
}
|
||||
|
||||
i915_gem_object_finish_access(obj);
|
||||
out_unlock:
|
||||
i915_gem_object_unlock(obj);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -887,24 +893,15 @@ out_file:
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct i915_vma *rpcs_query_batch(struct i915_vma *vma)
|
||||
static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, struct i915_vma *vma)
|
||||
{
|
||||
struct drm_i915_gem_object *obj;
|
||||
u32 *cmd;
|
||||
int err;
|
||||
|
||||
if (INTEL_GEN(vma->vm->i915) < 8)
|
||||
return ERR_PTR(-EINVAL);
|
||||
GEM_BUG_ON(INTEL_GEN(vma->vm->i915) < 8);
|
||||
|
||||
obj = i915_gem_object_create_internal(vma->vm->i915, PAGE_SIZE);
|
||||
if (IS_ERR(obj))
|
||||
return ERR_CAST(obj);
|
||||
|
||||
cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
|
||||
if (IS_ERR(cmd)) {
|
||||
err = PTR_ERR(cmd);
|
||||
goto err;
|
||||
}
|
||||
cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB);
|
||||
if (IS_ERR(cmd))
|
||||
return PTR_ERR(cmd);
|
||||
|
||||
*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
|
||||
*cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
|
||||
@ -912,26 +909,12 @@ static struct i915_vma *rpcs_query_batch(struct i915_vma *vma)
|
||||
*cmd++ = upper_32_bits(vma->node.start);
|
||||
*cmd = MI_BATCH_BUFFER_END;
|
||||
|
||||
__i915_gem_object_flush_map(obj, 0, 64);
|
||||
i915_gem_object_unpin_map(obj);
|
||||
__i915_gem_object_flush_map(rpcs, 0, 64);
|
||||
i915_gem_object_unpin_map(rpcs);
|
||||
|
||||
intel_gt_chipset_flush(vma->vm->gt);
|
||||
|
||||
vma = i915_vma_instance(obj, vma->vm, NULL);
|
||||
if (IS_ERR(vma)) {
|
||||
err = PTR_ERR(vma);
|
||||
goto err;
|
||||
}
|
||||
|
||||
err = i915_vma_pin(vma, 0, 0, PIN_USER);
|
||||
if (err)
|
||||
goto err;
|
||||
|
||||
return vma;
|
||||
|
||||
err:
|
||||
i915_gem_object_put(obj);
|
||||
return ERR_PTR(err);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
@ -939,52 +922,68 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
|
||||
struct intel_context *ce,
|
||||
struct i915_request **rq_out)
|
||||
{
|
||||
struct drm_i915_private *i915 = to_i915(obj->base.dev);
|
||||
struct i915_request *rq;
|
||||
struct i915_gem_ww_ctx ww;
|
||||
struct i915_vma *batch;
|
||||
struct i915_vma *vma;
|
||||
struct drm_i915_gem_object *rpcs;
|
||||
int err;
|
||||
|
||||
GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
|
||||
|
||||
if (INTEL_GEN(i915) < 8)
|
||||
return -EINVAL;
|
||||
|
||||
vma = i915_vma_instance(obj, ce->vm, NULL);
|
||||
if (IS_ERR(vma))
|
||||
return PTR_ERR(vma);
|
||||
|
||||
i915_gem_object_lock(obj);
|
||||
err = i915_gem_object_set_to_gtt_domain(obj, false);
|
||||
i915_gem_object_unlock(obj);
|
||||
if (err)
|
||||
return err;
|
||||
rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE);
|
||||
if (IS_ERR(rpcs))
|
||||
return PTR_ERR(rpcs);
|
||||
|
||||
err = i915_vma_pin(vma, 0, 0, PIN_USER);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
batch = rpcs_query_batch(vma);
|
||||
batch = i915_vma_instance(rpcs, ce->vm, NULL);
|
||||
if (IS_ERR(batch)) {
|
||||
err = PTR_ERR(batch);
|
||||
goto err_vma;
|
||||
goto err_put;
|
||||
}
|
||||
|
||||
i915_gem_ww_ctx_init(&ww, false);
|
||||
retry:
|
||||
err = i915_gem_object_lock(obj, &ww);
|
||||
if (!err)
|
||||
err = i915_gem_object_lock(rpcs, &ww);
|
||||
if (!err)
|
||||
err = i915_gem_object_set_to_gtt_domain(obj, false);
|
||||
if (!err)
|
||||
err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
|
||||
if (err)
|
||||
goto err_put;
|
||||
|
||||
err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER);
|
||||
if (err)
|
||||
goto err_vma;
|
||||
|
||||
err = rpcs_query_batch(rpcs, vma);
|
||||
if (err)
|
||||
goto err_batch;
|
||||
|
||||
rq = i915_request_create(ce);
|
||||
if (IS_ERR(rq)) {
|
||||
err = PTR_ERR(rq);
|
||||
goto err_batch;
|
||||
}
|
||||
|
||||
i915_vma_lock(batch);
|
||||
err = i915_request_await_object(rq, batch->obj, false);
|
||||
if (err == 0)
|
||||
err = i915_vma_move_to_active(batch, rq, 0);
|
||||
i915_vma_unlock(batch);
|
||||
if (err)
|
||||
goto skip_request;
|
||||
|
||||
i915_vma_lock(vma);
|
||||
err = i915_request_await_object(rq, vma->obj, true);
|
||||
if (err == 0)
|
||||
err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
|
||||
i915_vma_unlock(vma);
|
||||
if (err)
|
||||
goto skip_request;
|
||||
|
||||
@ -1000,23 +999,24 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
|
||||
if (err)
|
||||
goto skip_request;
|
||||
|
||||
i915_vma_unpin_and_release(&batch, 0);
|
||||
i915_vma_unpin(vma);
|
||||
|
||||
*rq_out = i915_request_get(rq);
|
||||
|
||||
i915_request_add(rq);
|
||||
|
||||
return 0;
|
||||
|
||||
skip_request:
|
||||
i915_request_set_error_once(rq, err);
|
||||
if (err)
|
||||
i915_request_set_error_once(rq, err);
|
||||
i915_request_add(rq);
|
||||
err_batch:
|
||||
i915_vma_unpin_and_release(&batch, 0);
|
||||
i915_vma_unpin(batch);
|
||||
err_vma:
|
||||
i915_vma_unpin(vma);
|
||||
|
||||
err_put:
|
||||
if (err == -EDEADLK) {
|
||||
err = i915_gem_ww_ctx_backoff(&ww);
|
||||
if (!err)
|
||||
goto retry;
|
||||
}
|
||||
i915_gem_ww_ctx_fini(&ww);
|
||||
i915_gem_object_put(rpcs);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -1709,7 +1709,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
|
||||
|
||||
i915_request_add(rq);
|
||||
|
||||
i915_gem_object_lock(obj);
|
||||
i915_gem_object_lock(obj, NULL);
|
||||
err = i915_gem_object_set_to_cpu_domain(obj, false);
|
||||
i915_gem_object_unlock(obj);
|
||||
if (err)
|
||||
@ -1748,7 +1748,7 @@ static int check_scratch_page(struct i915_gem_context *ctx, u32 *out)
|
||||
if (!vm)
|
||||
return -ENODEV;
|
||||
|
||||
page = vm->scratch[0].base.page;
|
||||
page = __px_page(vm->scratch[0]);
|
||||
if (!page) {
|
||||
pr_err("No scratch page!\n");
|
||||
return -EINVAL;
|
||||
@ -1914,8 +1914,8 @@ static int mock_context_barrier(void *arg)
|
||||
return -ENOMEM;
|
||||
|
||||
counter = 0;
|
||||
err = context_barrier_task(ctx, 0,
|
||||
NULL, NULL, mock_barrier_task, &counter);
|
||||
err = context_barrier_task(ctx, 0, NULL, NULL, NULL,
|
||||
mock_barrier_task, &counter);
|
||||
if (err) {
|
||||
pr_err("Failed at line %d, err=%d\n", __LINE__, err);
|
||||
goto out;
|
||||
@ -1927,11 +1927,8 @@ static int mock_context_barrier(void *arg)
|
||||
}
|
||||
|
||||
counter = 0;
|
||||
err = context_barrier_task(ctx, ALL_ENGINES,
|
||||
skip_unused_engines,
|
||||
NULL,
|
||||
mock_barrier_task,
|
||||
&counter);
|
||||
err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines,
|
||||
NULL, NULL, mock_barrier_task, &counter);
|
||||
if (err) {
|
||||
pr_err("Failed at line %d, err=%d\n", __LINE__, err);
|
||||
goto out;
|
||||
@ -1951,8 +1948,8 @@ static int mock_context_barrier(void *arg)
|
||||
|
||||
counter = 0;
|
||||
context_barrier_inject_fault = BIT(RCS0);
|
||||
err = context_barrier_task(ctx, ALL_ENGINES,
|
||||
NULL, NULL, mock_barrier_task, &counter);
|
||||
err = context_barrier_task(ctx, ALL_ENGINES, NULL, NULL, NULL,
|
||||
mock_barrier_task, &counter);
|
||||
context_barrier_inject_fault = 0;
|
||||
if (err == -ENXIO)
|
||||
err = 0;
|
||||
@ -1966,11 +1963,8 @@ static int mock_context_barrier(void *arg)
|
||||
goto out;
|
||||
|
||||
counter = 0;
|
||||
err = context_barrier_task(ctx, ALL_ENGINES,
|
||||
skip_unused_engines,
|
||||
NULL,
|
||||
mock_barrier_task,
|
||||
&counter);
|
||||
err = context_barrier_task(ctx, ALL_ENGINES, skip_unused_engines,
|
||||
NULL, NULL, mock_barrier_task, &counter);
|
||||
if (err) {
|
||||
pr_err("Failed at line %d, err=%d\n", __LINE__, err);
|
||||
goto out;
|
||||
|
@ -32,46 +32,39 @@ static int __igt_gpu_reloc(struct i915_execbuffer *eb,
|
||||
if (IS_ERR(vma))
|
||||
return PTR_ERR(vma);
|
||||
|
||||
err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
|
||||
err = i915_gem_object_lock(obj, &eb->ww);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, PIN_USER | PIN_HIGH);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* 8-Byte aligned */
|
||||
if (!__reloc_entry_gpu(eb, vma,
|
||||
offsets[0] * sizeof(u32),
|
||||
0)) {
|
||||
err = -EIO;
|
||||
goto unpin_vma;
|
||||
}
|
||||
err = __reloc_entry_gpu(eb, vma, offsets[0] * sizeof(u32), 0);
|
||||
if (err <= 0)
|
||||
goto reloc_err;
|
||||
|
||||
/* !8-Byte aligned */
|
||||
if (!__reloc_entry_gpu(eb, vma,
|
||||
offsets[1] * sizeof(u32),
|
||||
1)) {
|
||||
err = -EIO;
|
||||
goto unpin_vma;
|
||||
}
|
||||
err = __reloc_entry_gpu(eb, vma, offsets[1] * sizeof(u32), 1);
|
||||
if (err <= 0)
|
||||
goto reloc_err;
|
||||
|
||||
/* Skip to the end of the cmd page */
|
||||
i = PAGE_SIZE / sizeof(u32) - RELOC_TAIL - 1;
|
||||
i = PAGE_SIZE / sizeof(u32) - 1;
|
||||
i -= eb->reloc_cache.rq_size;
|
||||
memset32(eb->reloc_cache.rq_cmd + eb->reloc_cache.rq_size,
|
||||
MI_NOOP, i);
|
||||
eb->reloc_cache.rq_size += i;
|
||||
|
||||
/* Force batch chaining */
|
||||
if (!__reloc_entry_gpu(eb, vma,
|
||||
offsets[2] * sizeof(u32),
|
||||
2)) {
|
||||
err = -EIO;
|
||||
goto unpin_vma;
|
||||
}
|
||||
/* Force next batch */
|
||||
err = __reloc_entry_gpu(eb, vma, offsets[2] * sizeof(u32), 2);
|
||||
if (err <= 0)
|
||||
goto reloc_err;
|
||||
|
||||
GEM_BUG_ON(!eb->reloc_cache.rq);
|
||||
rq = i915_request_get(eb->reloc_cache.rq);
|
||||
err = reloc_gpu_flush(&eb->reloc_cache);
|
||||
if (err)
|
||||
goto put_rq;
|
||||
reloc_gpu_flush(eb, &eb->reloc_cache);
|
||||
GEM_BUG_ON(eb->reloc_cache.rq);
|
||||
|
||||
err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, HZ / 2);
|
||||
@ -103,6 +96,11 @@ put_rq:
|
||||
unpin_vma:
|
||||
i915_vma_unpin(vma);
|
||||
return err;
|
||||
|
||||
reloc_err:
|
||||
if (!err)
|
||||
err = -EIO;
|
||||
goto unpin_vma;
|
||||
}
|
||||
|
||||
static int igt_gpu_reloc(void *arg)
|
||||
@ -124,6 +122,8 @@ static int igt_gpu_reloc(void *arg)
|
||||
goto err_scratch;
|
||||
}
|
||||
|
||||
intel_gt_pm_get(&eb.i915->gt);
|
||||
|
||||
for_each_uabi_engine(eb.engine, eb.i915) {
|
||||
reloc_cache_init(&eb.reloc_cache, eb.i915);
|
||||
memset(map, POISON_INUSE, 4096);
|
||||
@ -134,15 +134,29 @@ static int igt_gpu_reloc(void *arg)
|
||||
err = PTR_ERR(eb.context);
|
||||
goto err_pm;
|
||||
}
|
||||
eb.reloc_pool = NULL;
|
||||
eb.reloc_context = NULL;
|
||||
|
||||
err = intel_context_pin(eb.context);
|
||||
if (err)
|
||||
goto err_put;
|
||||
i915_gem_ww_ctx_init(&eb.ww, false);
|
||||
retry:
|
||||
err = intel_context_pin_ww(eb.context, &eb.ww);
|
||||
if (!err) {
|
||||
err = __igt_gpu_reloc(&eb, scratch);
|
||||
|
||||
err = __igt_gpu_reloc(&eb, scratch);
|
||||
intel_context_unpin(eb.context);
|
||||
}
|
||||
if (err == -EDEADLK) {
|
||||
err = i915_gem_ww_ctx_backoff(&eb.ww);
|
||||
if (!err)
|
||||
goto retry;
|
||||
}
|
||||
i915_gem_ww_ctx_fini(&eb.ww);
|
||||
|
||||
if (eb.reloc_pool)
|
||||
intel_gt_buffer_pool_put(eb.reloc_pool);
|
||||
if (eb.reloc_context)
|
||||
intel_context_put(eb.reloc_context);
|
||||
|
||||
intel_context_unpin(eb.context);
|
||||
err_put:
|
||||
intel_context_put(eb.context);
|
||||
err_pm:
|
||||
intel_engine_pm_put(eb.engine);
|
||||
@ -153,6 +167,7 @@ err_pm:
|
||||
if (igt_flush_test(eb.i915))
|
||||
err = -EIO;
|
||||
|
||||
intel_gt_pm_put(&eb.i915->gt);
|
||||
err_scratch:
|
||||
i915_gem_object_put(scratch);
|
||||
return err;
|
||||
|
@ -103,7 +103,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
|
||||
GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
|
||||
GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
|
||||
|
||||
i915_gem_object_lock(obj);
|
||||
i915_gem_object_lock(obj, NULL);
|
||||
err = i915_gem_object_set_to_gtt_domain(obj, true);
|
||||
i915_gem_object_unlock(obj);
|
||||
if (err) {
|
||||
@ -188,7 +188,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj,
|
||||
GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
|
||||
GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
|
||||
|
||||
i915_gem_object_lock(obj);
|
||||
i915_gem_object_lock(obj, NULL);
|
||||
err = i915_gem_object_set_to_gtt_domain(obj, true);
|
||||
i915_gem_object_unlock(obj);
|
||||
if (err) {
|
||||
@ -528,31 +528,42 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
|
||||
for_each_uabi_engine(engine, i915) {
|
||||
struct i915_request *rq;
|
||||
struct i915_vma *vma;
|
||||
struct i915_gem_ww_ctx ww;
|
||||
int err;
|
||||
|
||||
vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
|
||||
if (IS_ERR(vma))
|
||||
return PTR_ERR(vma);
|
||||
|
||||
err = i915_vma_pin(vma, 0, 0, PIN_USER);
|
||||
i915_gem_ww_ctx_init(&ww, false);
|
||||
retry:
|
||||
err = i915_gem_object_lock(obj, &ww);
|
||||
if (!err)
|
||||
err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
|
||||
if (err)
|
||||
return err;
|
||||
goto err;
|
||||
|
||||
rq = intel_engine_create_kernel_request(engine);
|
||||
if (IS_ERR(rq)) {
|
||||
i915_vma_unpin(vma);
|
||||
return PTR_ERR(rq);
|
||||
err = PTR_ERR(rq);
|
||||
goto err_unpin;
|
||||
}
|
||||
|
||||
i915_vma_lock(vma);
|
||||
err = i915_request_await_object(rq, vma->obj, true);
|
||||
if (err == 0)
|
||||
err = i915_vma_move_to_active(vma, rq,
|
||||
EXEC_OBJECT_WRITE);
|
||||
i915_vma_unlock(vma);
|
||||
|
||||
i915_request_add(rq);
|
||||
err_unpin:
|
||||
i915_vma_unpin(vma);
|
||||
err:
|
||||
if (err == -EDEADLK) {
|
||||
err = i915_gem_ww_ctx_backoff(&ww);
|
||||
if (!err)
|
||||
goto retry;
|
||||
}
|
||||
i915_gem_ww_ctx_fini(&ww);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
@ -1123,6 +1134,7 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
|
||||
for_each_uabi_engine(engine, i915) {
|
||||
struct i915_request *rq;
|
||||
struct i915_vma *vma;
|
||||
struct i915_gem_ww_ctx ww;
|
||||
|
||||
vma = i915_vma_instance(obj, engine->kernel_context->vm, NULL);
|
||||
if (IS_ERR(vma)) {
|
||||
@ -1130,9 +1142,13 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
|
||||
goto out_unmap;
|
||||
}
|
||||
|
||||
err = i915_vma_pin(vma, 0, 0, PIN_USER);
|
||||
i915_gem_ww_ctx_init(&ww, false);
|
||||
retry:
|
||||
err = i915_gem_object_lock(obj, &ww);
|
||||
if (!err)
|
||||
err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
|
||||
if (err)
|
||||
goto out_unmap;
|
||||
goto out_ww;
|
||||
|
||||
rq = i915_request_create(engine->kernel_context);
|
||||
if (IS_ERR(rq)) {
|
||||
@ -1140,11 +1156,9 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
|
||||
goto out_unpin;
|
||||
}
|
||||
|
||||
i915_vma_lock(vma);
|
||||
err = i915_request_await_object(rq, vma->obj, false);
|
||||
if (err == 0)
|
||||
err = i915_vma_move_to_active(vma, rq, 0);
|
||||
i915_vma_unlock(vma);
|
||||
|
||||
err = engine->emit_bb_start(rq, vma->node.start, 0, 0);
|
||||
i915_request_get(rq);
|
||||
@ -1166,6 +1180,13 @@ static int __igt_mmap_gpu(struct drm_i915_private *i915,
|
||||
|
||||
out_unpin:
|
||||
i915_vma_unpin(vma);
|
||||
out_ww:
|
||||
if (err == -EDEADLK) {
|
||||
err = i915_gem_ww_ctx_backoff(&ww);
|
||||
if (!err)
|
||||
goto retry;
|
||||
}
|
||||
i915_gem_ww_ctx_fini(&ww);
|
||||
if (err)
|
||||
goto out_unmap;
|
||||
}
|
||||
|
@ -44,7 +44,7 @@ static int mock_phys_object(void *arg)
|
||||
}
|
||||
|
||||
/* Make the object dirty so that put_pages must do copy back the data */
|
||||
i915_gem_object_lock(obj);
|
||||
i915_gem_object_lock(obj, NULL);
|
||||
err = i915_gem_object_set_to_gtt_domain(obj, true);
|
||||
i915_gem_object_unlock(obj);
|
||||
if (err) {
|
||||
|
@ -16,8 +16,10 @@ static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt,
|
||||
const unsigned int pde,
|
||||
const struct i915_page_table *pt)
|
||||
{
|
||||
dma_addr_t addr = pt ? px_dma(pt) : px_dma(ppgtt->base.vm.scratch[1]);
|
||||
|
||||
/* Caller needs to make sure the write completes if necessary */
|
||||
iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
|
||||
iowrite32(GEN6_PDE_ADDR_ENCODE(addr) | GEN6_PDE_VALID,
|
||||
ppgtt->pd_addr + pde);
|
||||
}
|
||||
|
||||
@ -79,7 +81,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
|
||||
{
|
||||
struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
|
||||
const unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
|
||||
const gen6_pte_t scratch_pte = vm->scratch[0].encode;
|
||||
const gen6_pte_t scratch_pte = vm->scratch[0]->encode;
|
||||
unsigned int pde = first_entry / GEN6_PTES;
|
||||
unsigned int pte = first_entry % GEN6_PTES;
|
||||
unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
|
||||
@ -90,8 +92,6 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
|
||||
const unsigned int count = min(num_entries, GEN6_PTES - pte);
|
||||
gen6_pte_t *vaddr;
|
||||
|
||||
GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1]));
|
||||
|
||||
num_entries -= count;
|
||||
|
||||
GEM_BUG_ON(count > atomic_read(&pt->used));
|
||||
@ -127,7 +127,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
|
||||
struct sgt_dma iter = sgt_dma(vma);
|
||||
gen6_pte_t *vaddr;
|
||||
|
||||
GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]);
|
||||
GEM_BUG_ON(!pd->entry[act_pt]);
|
||||
|
||||
vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
|
||||
do {
|
||||
@ -177,39 +177,36 @@ static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end)
|
||||
mutex_unlock(&ppgtt->flush);
|
||||
}
|
||||
|
||||
static int gen6_alloc_va_range(struct i915_address_space *vm,
|
||||
u64 start, u64 length)
|
||||
static void gen6_alloc_va_range(struct i915_address_space *vm,
|
||||
struct i915_vm_pt_stash *stash,
|
||||
u64 start, u64 length)
|
||||
{
|
||||
struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
|
||||
struct i915_page_directory * const pd = ppgtt->base.pd;
|
||||
struct i915_page_table *pt, *alloc = NULL;
|
||||
struct i915_page_table *pt;
|
||||
bool flush = false;
|
||||
u64 from = start;
|
||||
unsigned int pde;
|
||||
int ret = 0;
|
||||
|
||||
spin_lock(&pd->lock);
|
||||
gen6_for_each_pde(pt, pd, start, length, pde) {
|
||||
const unsigned int count = gen6_pte_count(start, length);
|
||||
|
||||
if (px_base(pt) == px_base(&vm->scratch[1])) {
|
||||
if (!pt) {
|
||||
spin_unlock(&pd->lock);
|
||||
|
||||
pt = fetch_and_zero(&alloc);
|
||||
if (!pt)
|
||||
pt = alloc_pt(vm);
|
||||
if (IS_ERR(pt)) {
|
||||
ret = PTR_ERR(pt);
|
||||
goto unwind_out;
|
||||
}
|
||||
pt = stash->pt[0];
|
||||
__i915_gem_object_pin_pages(pt->base);
|
||||
i915_gem_object_make_unshrinkable(pt->base);
|
||||
|
||||
fill32_px(pt, vm->scratch[0].encode);
|
||||
fill32_px(pt, vm->scratch[0]->encode);
|
||||
|
||||
spin_lock(&pd->lock);
|
||||
if (pd->entry[pde] == &vm->scratch[1]) {
|
||||
if (!pd->entry[pde]) {
|
||||
stash->pt[0] = pt->stash;
|
||||
atomic_set(&pt->used, 0);
|
||||
pd->entry[pde] = pt;
|
||||
} else {
|
||||
alloc = pt;
|
||||
pt = pd->entry[pde];
|
||||
}
|
||||
|
||||
@ -226,38 +223,32 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
|
||||
with_intel_runtime_pm(&vm->i915->runtime_pm, wakeref)
|
||||
gen6_flush_pd(ppgtt, from, start);
|
||||
}
|
||||
|
||||
goto out;
|
||||
|
||||
unwind_out:
|
||||
gen6_ppgtt_clear_range(vm, from, start - from);
|
||||
out:
|
||||
if (alloc)
|
||||
free_px(vm, alloc);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
|
||||
{
|
||||
struct i915_address_space * const vm = &ppgtt->base.vm;
|
||||
struct i915_page_directory * const pd = ppgtt->base.pd;
|
||||
int ret;
|
||||
|
||||
ret = setup_scratch_page(vm, __GFP_HIGHMEM);
|
||||
ret = setup_scratch_page(vm);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
vm->scratch[0].encode =
|
||||
vm->pte_encode(px_dma(&vm->scratch[0]),
|
||||
vm->scratch[0]->encode =
|
||||
vm->pte_encode(px_dma(vm->scratch[0]),
|
||||
I915_CACHE_NONE, PTE_READ_ONLY);
|
||||
|
||||
if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[1])))) {
|
||||
cleanup_scratch_page(vm);
|
||||
return -ENOMEM;
|
||||
vm->scratch[1] = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
|
||||
if (IS_ERR(vm->scratch[1]))
|
||||
return PTR_ERR(vm->scratch[1]);
|
||||
|
||||
ret = pin_pt_dma(vm, vm->scratch[1]);
|
||||
if (ret) {
|
||||
i915_gem_object_put(vm->scratch[1]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
fill32_px(&vm->scratch[1], vm->scratch[0].encode);
|
||||
memset_p(pd->entry, &vm->scratch[1], I915_PDES);
|
||||
fill32_px(vm->scratch[1], vm->scratch[0]->encode);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -265,14 +256,12 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
|
||||
static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
|
||||
{
|
||||
struct i915_page_directory * const pd = ppgtt->base.pd;
|
||||
struct i915_page_dma * const scratch =
|
||||
px_base(&ppgtt->base.vm.scratch[1]);
|
||||
struct i915_page_table *pt;
|
||||
u32 pde;
|
||||
|
||||
gen6_for_all_pdes(pt, pd, pde)
|
||||
if (px_base(pt) != scratch)
|
||||
free_px(&ppgtt->base.vm, pt);
|
||||
if (pt)
|
||||
free_pt(&ppgtt->base.vm, pt);
|
||||
}
|
||||
|
||||
static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
|
||||
@ -286,7 +275,8 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
|
||||
|
||||
mutex_destroy(&ppgtt->flush);
|
||||
mutex_destroy(&ppgtt->pin_mutex);
|
||||
kfree(ppgtt->base.pd);
|
||||
|
||||
free_pd(&ppgtt->base.vm, ppgtt->base.pd);
|
||||
}
|
||||
|
||||
static int pd_vma_set_pages(struct i915_vma *vma)
|
||||
@ -302,28 +292,26 @@ static void pd_vma_clear_pages(struct i915_vma *vma)
|
||||
vma->pages = NULL;
|
||||
}
|
||||
|
||||
static int pd_vma_bind(struct i915_address_space *vm,
|
||||
struct i915_vma *vma,
|
||||
enum i915_cache_level cache_level,
|
||||
u32 unused)
|
||||
static void pd_vma_bind(struct i915_address_space *vm,
|
||||
struct i915_vm_pt_stash *stash,
|
||||
struct i915_vma *vma,
|
||||
enum i915_cache_level cache_level,
|
||||
u32 unused)
|
||||
{
|
||||
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
|
||||
struct gen6_ppgtt *ppgtt = vma->private;
|
||||
u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
|
||||
|
||||
px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
|
||||
ppgtt->pp_dir = ggtt_offset * sizeof(gen6_pte_t) << 10;
|
||||
ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
|
||||
|
||||
gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma)
|
||||
{
|
||||
struct gen6_ppgtt *ppgtt = vma->private;
|
||||
struct i915_page_directory * const pd = ppgtt->base.pd;
|
||||
struct i915_page_dma * const scratch =
|
||||
px_base(&ppgtt->base.vm.scratch[1]);
|
||||
struct i915_page_table *pt;
|
||||
unsigned int pde;
|
||||
|
||||
@ -332,11 +320,11 @@ static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma)
|
||||
|
||||
/* Free all no longer used page tables */
|
||||
gen6_for_all_pdes(pt, ppgtt->base.pd, pde) {
|
||||
if (px_base(pt) == scratch || atomic_read(&pt->used))
|
||||
if (!pt || atomic_read(&pt->used))
|
||||
continue;
|
||||
|
||||
free_px(&ppgtt->base.vm, pt);
|
||||
pd->entry[pde] = scratch;
|
||||
free_pt(&ppgtt->base.vm, pt);
|
||||
pd->entry[pde] = NULL;
|
||||
}
|
||||
|
||||
ppgtt->scan_for_unused_pt = false;
|
||||
@ -380,7 +368,7 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
|
||||
return vma;
|
||||
}
|
||||
|
||||
int gen6_ppgtt_pin(struct i915_ppgtt *base)
|
||||
int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww)
|
||||
{
|
||||
struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
|
||||
int err;
|
||||
@ -406,7 +394,7 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base)
|
||||
*/
|
||||
err = 0;
|
||||
if (!atomic_read(&ppgtt->pin_count))
|
||||
err = i915_ggtt_pin(ppgtt->vma, GEN6_PD_ALIGN, PIN_HIGH);
|
||||
err = i915_ggtt_pin(ppgtt->vma, ww, GEN6_PD_ALIGN, PIN_HIGH);
|
||||
if (!err)
|
||||
atomic_inc(&ppgtt->pin_count);
|
||||
mutex_unlock(&ppgtt->pin_mutex);
|
||||
@ -448,6 +436,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
|
||||
mutex_init(&ppgtt->pin_mutex);
|
||||
|
||||
ppgtt_init(&ppgtt->base, gt);
|
||||
ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t));
|
||||
ppgtt->base.vm.top = 1;
|
||||
|
||||
ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND;
|
||||
@ -456,9 +445,10 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
|
||||
ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
|
||||
ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
|
||||
|
||||
ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma;
|
||||
ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
|
||||
|
||||
ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd));
|
||||
ppgtt->base.pd = __alloc_pd(I915_PDES);
|
||||
if (!ppgtt->base.pd) {
|
||||
err = -ENOMEM;
|
||||
goto err_free;
|
||||
@ -479,7 +469,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
|
||||
err_scratch:
|
||||
free_scratch(&ppgtt->base.vm);
|
||||
err_pd:
|
||||
kfree(ppgtt->base.pd);
|
||||
free_pd(&ppgtt->base.vm, ppgtt->base.pd);
|
||||
err_free:
|
||||
mutex_destroy(&ppgtt->pin_mutex);
|
||||
kfree(ppgtt);
|
||||
|
@ -8,12 +8,15 @@
|
||||
|
||||
#include "intel_gtt.h"
|
||||
|
||||
struct i915_gem_ww_ctx;
|
||||
|
||||
struct gen6_ppgtt {
|
||||
struct i915_ppgtt base;
|
||||
|
||||
struct mutex flush;
|
||||
struct i915_vma *vma;
|
||||
gen6_pte_t __iomem *pd_addr;
|
||||
u32 pp_dir;
|
||||
|
||||
atomic_t pin_count;
|
||||
struct mutex pin_mutex;
|
||||
@ -66,7 +69,7 @@ static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base)
|
||||
(pt = i915_pt_entry(pd, iter), true); \
|
||||
++iter)
|
||||
|
||||
int gen6_ppgtt_pin(struct i915_ppgtt *base);
|
||||
int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww);
|
||||
void gen6_ppgtt_unpin(struct i915_ppgtt *base);
|
||||
void gen6_ppgtt_unpin_all(struct i915_ppgtt *base);
|
||||
void gen6_ppgtt_enable(struct intel_gt *gt);
|
||||
|
@ -181,7 +181,7 @@ static void __gen8_ppgtt_cleanup(struct i915_address_space *vm,
|
||||
} while (pde++, --count);
|
||||
}
|
||||
|
||||
free_px(vm, pd);
|
||||
free_px(vm, &pd->pt, lvl);
|
||||
}
|
||||
|
||||
static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
|
||||
@ -199,7 +199,7 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
|
||||
struct i915_page_directory * const pd,
|
||||
u64 start, const u64 end, int lvl)
|
||||
{
|
||||
const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
|
||||
const struct drm_i915_gem_object * const scratch = vm->scratch[lvl];
|
||||
unsigned int idx, len;
|
||||
|
||||
GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
|
||||
@ -239,7 +239,7 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
|
||||
|
||||
vaddr = kmap_atomic_px(pt);
|
||||
memset64(vaddr + gen8_pd_index(start, 0),
|
||||
vm->scratch[0].encode,
|
||||
vm->scratch[0]->encode,
|
||||
count);
|
||||
kunmap_atomic(vaddr);
|
||||
|
||||
@ -248,7 +248,7 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
|
||||
}
|
||||
|
||||
if (release_pd_entry(pd, idx, pt, scratch))
|
||||
free_px(vm, pt);
|
||||
free_px(vm, pt, lvl);
|
||||
} while (idx++, --len);
|
||||
|
||||
return start;
|
||||
@ -269,14 +269,12 @@ static void gen8_ppgtt_clear(struct i915_address_space *vm,
|
||||
start, start + length, vm->top);
|
||||
}
|
||||
|
||||
static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
|
||||
struct i915_page_directory * const pd,
|
||||
u64 * const start, const u64 end, int lvl)
|
||||
static void __gen8_ppgtt_alloc(struct i915_address_space * const vm,
|
||||
struct i915_vm_pt_stash *stash,
|
||||
struct i915_page_directory * const pd,
|
||||
u64 * const start, const u64 end, int lvl)
|
||||
{
|
||||
const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
|
||||
struct i915_page_table *alloc = NULL;
|
||||
unsigned int idx, len;
|
||||
int ret = 0;
|
||||
|
||||
GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
|
||||
|
||||
@ -297,49 +295,31 @@ static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
|
||||
DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
|
||||
__func__, vm, lvl + 1, idx);
|
||||
|
||||
pt = fetch_and_zero(&alloc);
|
||||
if (lvl) {
|
||||
if (!pt) {
|
||||
pt = &alloc_pd(vm)->pt;
|
||||
if (IS_ERR(pt)) {
|
||||
ret = PTR_ERR(pt);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
pt = stash->pt[!!lvl];
|
||||
__i915_gem_object_pin_pages(pt->base);
|
||||
i915_gem_object_make_unshrinkable(pt->base);
|
||||
|
||||
fill_px(pt, vm->scratch[lvl].encode);
|
||||
} else {
|
||||
if (!pt) {
|
||||
pt = alloc_pt(vm);
|
||||
if (IS_ERR(pt)) {
|
||||
ret = PTR_ERR(pt);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (intel_vgpu_active(vm->i915) ||
|
||||
gen8_pt_count(*start, end) < I915_PDES)
|
||||
fill_px(pt, vm->scratch[lvl].encode);
|
||||
}
|
||||
if (lvl ||
|
||||
gen8_pt_count(*start, end) < I915_PDES ||
|
||||
intel_vgpu_active(vm->i915))
|
||||
fill_px(pt, vm->scratch[lvl]->encode);
|
||||
|
||||
spin_lock(&pd->lock);
|
||||
if (likely(!pd->entry[idx]))
|
||||
if (likely(!pd->entry[idx])) {
|
||||
stash->pt[!!lvl] = pt->stash;
|
||||
atomic_set(&pt->used, 0);
|
||||
set_pd_entry(pd, idx, pt);
|
||||
else
|
||||
alloc = pt, pt = pd->entry[idx];
|
||||
} else {
|
||||
pt = pd->entry[idx];
|
||||
}
|
||||
}
|
||||
|
||||
if (lvl) {
|
||||
atomic_inc(&pt->used);
|
||||
spin_unlock(&pd->lock);
|
||||
|
||||
ret = __gen8_ppgtt_alloc(vm, as_pd(pt),
|
||||
start, end, lvl);
|
||||
if (unlikely(ret)) {
|
||||
if (release_pd_entry(pd, idx, pt, scratch))
|
||||
free_px(vm, pt);
|
||||
goto out;
|
||||
}
|
||||
__gen8_ppgtt_alloc(vm, stash,
|
||||
as_pd(pt), start, end, lvl);
|
||||
|
||||
spin_lock(&pd->lock);
|
||||
atomic_dec(&pt->used);
|
||||
@ -359,18 +339,12 @@ static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
|
||||
}
|
||||
} while (idx++, --len);
|
||||
spin_unlock(&pd->lock);
|
||||
out:
|
||||
if (alloc)
|
||||
free_px(vm, alloc);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int gen8_ppgtt_alloc(struct i915_address_space *vm,
|
||||
u64 start, u64 length)
|
||||
static void gen8_ppgtt_alloc(struct i915_address_space *vm,
|
||||
struct i915_vm_pt_stash *stash,
|
||||
u64 start, u64 length)
|
||||
{
|
||||
u64 from;
|
||||
int err;
|
||||
|
||||
GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
|
||||
GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
|
||||
GEM_BUG_ON(range_overflows(start, length, vm->total));
|
||||
@ -378,25 +352,9 @@ static int gen8_ppgtt_alloc(struct i915_address_space *vm,
|
||||
start >>= GEN8_PTE_SHIFT;
|
||||
length >>= GEN8_PTE_SHIFT;
|
||||
GEM_BUG_ON(length == 0);
|
||||
from = start;
|
||||
|
||||
err = __gen8_ppgtt_alloc(vm, i915_vm_to_ppgtt(vm)->pd,
|
||||
&start, start + length, vm->top);
|
||||
if (unlikely(err && from != start))
|
||||
__gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
|
||||
from, start, vm->top);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static __always_inline void
|
||||
write_pte(gen8_pte_t *pte, const gen8_pte_t val)
|
||||
{
|
||||
/* Magic delays? Or can we refine these to flush all in one pass? */
|
||||
*pte = val;
|
||||
wmb(); /* cpu to cache */
|
||||
clflush(pte); /* cache to memory */
|
||||
wmb(); /* visible to all */
|
||||
__gen8_ppgtt_alloc(vm, stash, i915_vm_to_ppgtt(vm)->pd,
|
||||
&start, start + length, vm->top);
|
||||
}
|
||||
|
||||
static __always_inline u64
|
||||
@ -415,8 +373,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
|
||||
vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
|
||||
do {
|
||||
GEM_BUG_ON(iter->sg->length < I915_GTT_PAGE_SIZE);
|
||||
write_pte(&vaddr[gen8_pd_index(idx, 0)],
|
||||
pte_encode | iter->dma);
|
||||
vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
|
||||
|
||||
iter->dma += I915_GTT_PAGE_SIZE;
|
||||
if (iter->dma >= iter->max) {
|
||||
@ -439,10 +396,12 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
|
||||
pd = pdp->entry[gen8_pd_index(idx, 2)];
|
||||
}
|
||||
|
||||
clflush_cache_range(vaddr, PAGE_SIZE);
|
||||
kunmap_atomic(vaddr);
|
||||
vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
|
||||
}
|
||||
} while (1);
|
||||
clflush_cache_range(vaddr, PAGE_SIZE);
|
||||
kunmap_atomic(vaddr);
|
||||
|
||||
return idx;
|
||||
@ -498,7 +457,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
|
||||
|
||||
do {
|
||||
GEM_BUG_ON(iter->sg->length < page_size);
|
||||
write_pte(&vaddr[index++], encode | iter->dma);
|
||||
vaddr[index++] = encode | iter->dma;
|
||||
|
||||
start += page_size;
|
||||
iter->dma += page_size;
|
||||
@ -523,6 +482,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
|
||||
}
|
||||
} while (rem >= page_size && index < I915_PDES);
|
||||
|
||||
clflush_cache_range(vaddr, PAGE_SIZE);
|
||||
kunmap_atomic(vaddr);
|
||||
|
||||
/*
|
||||
@ -554,7 +514,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
|
||||
if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) {
|
||||
u16 i;
|
||||
|
||||
encode = vma->vm->scratch[0].encode;
|
||||
encode = vma->vm->scratch[0]->encode;
|
||||
vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K));
|
||||
|
||||
for (i = 1; i < index; i += 16)
|
||||
@ -608,27 +568,37 @@ static int gen8_init_scratch(struct i915_address_space *vm)
|
||||
GEM_BUG_ON(!clone->has_read_only);
|
||||
|
||||
vm->scratch_order = clone->scratch_order;
|
||||
memcpy(vm->scratch, clone->scratch, sizeof(vm->scratch));
|
||||
px_dma(&vm->scratch[0]) = 0; /* no xfer of ownership */
|
||||
for (i = 0; i <= vm->top; i++)
|
||||
vm->scratch[i] = i915_gem_object_get(clone->scratch[i]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = setup_scratch_page(vm, __GFP_HIGHMEM);
|
||||
ret = setup_scratch_page(vm);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
vm->scratch[0].encode =
|
||||
gen8_pte_encode(px_dma(&vm->scratch[0]),
|
||||
vm->scratch[0]->encode =
|
||||
gen8_pte_encode(px_dma(vm->scratch[0]),
|
||||
I915_CACHE_LLC, vm->has_read_only);
|
||||
|
||||
for (i = 1; i <= vm->top; i++) {
|
||||
if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[i]))))
|
||||
struct drm_i915_gem_object *obj;
|
||||
|
||||
obj = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
|
||||
if (IS_ERR(obj))
|
||||
goto free_scratch;
|
||||
|
||||
fill_px(&vm->scratch[i], vm->scratch[i - 1].encode);
|
||||
vm->scratch[i].encode =
|
||||
gen8_pde_encode(px_dma(&vm->scratch[i]),
|
||||
I915_CACHE_LLC);
|
||||
ret = pin_pt_dma(vm, obj);
|
||||
if (ret) {
|
||||
i915_gem_object_put(obj);
|
||||
goto free_scratch;
|
||||
}
|
||||
|
||||
fill_px(obj, vm->scratch[i - 1]->encode);
|
||||
obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_LLC);
|
||||
|
||||
vm->scratch[i] = obj;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -649,12 +619,20 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
|
||||
|
||||
for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) {
|
||||
struct i915_page_directory *pde;
|
||||
int err;
|
||||
|
||||
pde = alloc_pd(vm);
|
||||
if (IS_ERR(pde))
|
||||
return PTR_ERR(pde);
|
||||
|
||||
fill_px(pde, vm->scratch[1].encode);
|
||||
err = pin_pt_dma(vm, pde->pt.base);
|
||||
if (err) {
|
||||
i915_gem_object_put(pde->pt.base);
|
||||
free_pd(vm, pde);
|
||||
return err;
|
||||
}
|
||||
|
||||
fill_px(pde, vm->scratch[1]->encode);
|
||||
set_pd_entry(pd, idx, pde);
|
||||
atomic_inc(px_used(pde)); /* keep pinned */
|
||||
}
|
||||
@ -668,21 +646,32 @@ gen8_alloc_top_pd(struct i915_address_space *vm)
|
||||
{
|
||||
const unsigned int count = gen8_pd_top_count(vm);
|
||||
struct i915_page_directory *pd;
|
||||
int err;
|
||||
|
||||
GEM_BUG_ON(count > ARRAY_SIZE(pd->entry));
|
||||
GEM_BUG_ON(count > I915_PDES);
|
||||
|
||||
pd = __alloc_pd(offsetof(typeof(*pd), entry[count]));
|
||||
pd = __alloc_pd(count);
|
||||
if (unlikely(!pd))
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
if (unlikely(setup_page_dma(vm, px_base(pd)))) {
|
||||
kfree(pd);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
|
||||
if (IS_ERR(pd->pt.base)) {
|
||||
err = PTR_ERR(pd->pt.base);
|
||||
pd->pt.base = NULL;
|
||||
goto err_pd;
|
||||
}
|
||||
|
||||
fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count);
|
||||
err = pin_pt_dma(vm, pd->pt.base);
|
||||
if (err)
|
||||
goto err_pd;
|
||||
|
||||
fill_page_dma(px_base(pd), vm->scratch[vm->top]->encode, count);
|
||||
atomic_inc(px_used(pd)); /* mark as pinned */
|
||||
return pd;
|
||||
|
||||
err_pd:
|
||||
free_pd(vm, pd);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -703,6 +692,7 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
|
||||
|
||||
ppgtt_init(ppgtt, gt);
|
||||
ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
|
||||
ppgtt->vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen8_pte_t));
|
||||
|
||||
/*
|
||||
* From bdw, there is hw support for read-only pages in the PPGTT.
|
||||
@ -714,12 +704,7 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
|
||||
*/
|
||||
ppgtt->vm.has_read_only = !IS_GEN_RANGE(gt->i915, 11, 12);
|
||||
|
||||
/*
|
||||
* There are only few exceptions for gen >=6. chv and bxt.
|
||||
* And we are not sure about the latter so play safe for now.
|
||||
*/
|
||||
if (IS_CHERRYVIEW(gt->i915) || IS_BROXTON(gt->i915))
|
||||
ppgtt->vm.pt_kmap_wc = true;
|
||||
ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
|
||||
|
||||
err = gen8_init_scratch(&ppgtt->vm);
|
||||
if (err)
|
||||
|
@ -28,6 +28,8 @@
|
||||
|
||||
#include "i915_drv.h"
|
||||
#include "i915_trace.h"
|
||||
#include "intel_breadcrumbs.h"
|
||||
#include "intel_context.h"
|
||||
#include "intel_gt_pm.h"
|
||||
#include "intel_gt_requests.h"
|
||||
|
||||
@ -53,33 +55,65 @@ static void irq_disable(struct intel_engine_cs *engine)
|
||||
spin_unlock(&engine->gt->irq_lock);
|
||||
}
|
||||
|
||||
static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
|
||||
{
|
||||
lockdep_assert_held(&b->irq_lock);
|
||||
|
||||
if (!b->irq_engine || b->irq_armed)
|
||||
return;
|
||||
|
||||
if (!intel_gt_pm_get_if_awake(b->irq_engine->gt))
|
||||
return;
|
||||
|
||||
/*
|
||||
* The breadcrumb irq will be disarmed on the interrupt after the
|
||||
* waiters are signaled. This gives us a single interrupt window in
|
||||
* which we can add a new waiter and avoid the cost of re-enabling
|
||||
* the irq.
|
||||
*/
|
||||
WRITE_ONCE(b->irq_armed, true);
|
||||
|
||||
/*
|
||||
* Since we are waiting on a request, the GPU should be busy
|
||||
* and should have its own rpm reference. This is tracked
|
||||
* by i915->gt.awake, we can forgo holding our own wakref
|
||||
* for the interrupt as before i915->gt.awake is released (when
|
||||
* the driver is idle) we disarm the breadcrumbs.
|
||||
*/
|
||||
|
||||
if (!b->irq_enabled++)
|
||||
irq_enable(b->irq_engine);
|
||||
}
|
||||
|
||||
static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
|
||||
{
|
||||
struct intel_engine_cs *engine =
|
||||
container_of(b, struct intel_engine_cs, breadcrumbs);
|
||||
|
||||
lockdep_assert_held(&b->irq_lock);
|
||||
|
||||
if (!b->irq_engine || !b->irq_armed)
|
||||
return;
|
||||
|
||||
GEM_BUG_ON(!b->irq_enabled);
|
||||
if (!--b->irq_enabled)
|
||||
irq_disable(engine);
|
||||
irq_disable(b->irq_engine);
|
||||
|
||||
WRITE_ONCE(b->irq_armed, false);
|
||||
intel_gt_pm_put_async(engine->gt);
|
||||
intel_gt_pm_put_async(b->irq_engine->gt);
|
||||
}
|
||||
|
||||
void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
|
||||
static void add_signaling_context(struct intel_breadcrumbs *b,
|
||||
struct intel_context *ce)
|
||||
{
|
||||
struct intel_breadcrumbs *b = &engine->breadcrumbs;
|
||||
unsigned long flags;
|
||||
intel_context_get(ce);
|
||||
list_add_tail(&ce->signal_link, &b->signalers);
|
||||
if (list_is_first(&ce->signal_link, &b->signalers))
|
||||
__intel_breadcrumbs_arm_irq(b);
|
||||
}
|
||||
|
||||
if (!READ_ONCE(b->irq_armed))
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&b->irq_lock, flags);
|
||||
if (b->irq_armed)
|
||||
__intel_breadcrumbs_disarm_irq(b);
|
||||
spin_unlock_irqrestore(&b->irq_lock, flags);
|
||||
static void remove_signaling_context(struct intel_breadcrumbs *b,
|
||||
struct intel_context *ce)
|
||||
{
|
||||
list_del(&ce->signal_link);
|
||||
intel_context_put(ce);
|
||||
}
|
||||
|
||||
static inline bool __request_completed(const struct i915_request *rq)
|
||||
@ -90,6 +124,9 @@ static inline bool __request_completed(const struct i915_request *rq)
|
||||
__maybe_unused static bool
|
||||
check_signal_order(struct intel_context *ce, struct i915_request *rq)
|
||||
{
|
||||
if (rq->context != ce)
|
||||
return false;
|
||||
|
||||
if (!list_is_last(&rq->signal_link, &ce->signals) &&
|
||||
i915_seqno_passed(rq->fence.seqno,
|
||||
list_next_entry(rq, signal_link)->fence.seqno))
|
||||
@ -133,25 +170,21 @@ __dma_fence_signal__notify(struct dma_fence *fence,
|
||||
|
||||
static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
|
||||
{
|
||||
struct intel_engine_cs *engine =
|
||||
container_of(b, struct intel_engine_cs, breadcrumbs);
|
||||
|
||||
if (unlikely(intel_engine_is_virtual(engine)))
|
||||
engine = intel_virtual_engine_get_sibling(engine, 0);
|
||||
|
||||
intel_engine_add_retire(engine, tl);
|
||||
if (b->irq_engine)
|
||||
intel_engine_add_retire(b->irq_engine, tl);
|
||||
}
|
||||
|
||||
static void __signal_request(struct i915_request *rq, struct list_head *signals)
|
||||
static bool __signal_request(struct i915_request *rq, struct list_head *signals)
|
||||
{
|
||||
GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
|
||||
clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
|
||||
|
||||
if (!__dma_fence_signal(&rq->fence))
|
||||
return;
|
||||
if (!__dma_fence_signal(&rq->fence)) {
|
||||
i915_request_put(rq);
|
||||
return false;
|
||||
}
|
||||
|
||||
i915_request_get(rq);
|
||||
list_add_tail(&rq->signal_link, signals);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void signal_irq_work(struct irq_work *work)
|
||||
@ -164,7 +197,7 @@ static void signal_irq_work(struct irq_work *work)
|
||||
|
||||
spin_lock(&b->irq_lock);
|
||||
|
||||
if (b->irq_armed && list_empty(&b->signalers))
|
||||
if (list_empty(&b->signalers))
|
||||
__intel_breadcrumbs_disarm_irq(b);
|
||||
|
||||
list_splice_init(&b->signaled_requests, &signal);
|
||||
@ -197,8 +230,8 @@ static void signal_irq_work(struct irq_work *work)
|
||||
/* Advance the list to the first incomplete request */
|
||||
__list_del_many(&ce->signals, pos);
|
||||
if (&ce->signals == pos) { /* now empty */
|
||||
list_del_init(&ce->signal_link);
|
||||
add_retire(b, ce->timeline);
|
||||
remove_signaling_context(b, ce);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -220,116 +253,89 @@ static void signal_irq_work(struct irq_work *work)
|
||||
}
|
||||
}
|
||||
|
||||
static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
|
||||
struct intel_breadcrumbs *
|
||||
intel_breadcrumbs_create(struct intel_engine_cs *irq_engine)
|
||||
{
|
||||
struct intel_engine_cs *engine =
|
||||
container_of(b, struct intel_engine_cs, breadcrumbs);
|
||||
struct intel_breadcrumbs *b;
|
||||
|
||||
lockdep_assert_held(&b->irq_lock);
|
||||
if (b->irq_armed)
|
||||
return true;
|
||||
|
||||
if (!intel_gt_pm_get_if_awake(engine->gt))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* The breadcrumb irq will be disarmed on the interrupt after the
|
||||
* waiters are signaled. This gives us a single interrupt window in
|
||||
* which we can add a new waiter and avoid the cost of re-enabling
|
||||
* the irq.
|
||||
*/
|
||||
WRITE_ONCE(b->irq_armed, true);
|
||||
|
||||
/*
|
||||
* Since we are waiting on a request, the GPU should be busy
|
||||
* and should have its own rpm reference. This is tracked
|
||||
* by i915->gt.awake, we can forgo holding our own wakref
|
||||
* for the interrupt as before i915->gt.awake is released (when
|
||||
* the driver is idle) we disarm the breadcrumbs.
|
||||
*/
|
||||
|
||||
if (!b->irq_enabled++)
|
||||
irq_enable(engine);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
|
||||
{
|
||||
struct intel_breadcrumbs *b = &engine->breadcrumbs;
|
||||
b = kzalloc(sizeof(*b), GFP_KERNEL);
|
||||
if (!b)
|
||||
return NULL;
|
||||
|
||||
spin_lock_init(&b->irq_lock);
|
||||
INIT_LIST_HEAD(&b->signalers);
|
||||
INIT_LIST_HEAD(&b->signaled_requests);
|
||||
|
||||
init_irq_work(&b->irq_work, signal_irq_work);
|
||||
|
||||
b->irq_engine = irq_engine;
|
||||
|
||||
return b;
|
||||
}
|
||||
|
||||
void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
|
||||
void intel_breadcrumbs_reset(struct intel_breadcrumbs *b)
|
||||
{
|
||||
struct intel_breadcrumbs *b = &engine->breadcrumbs;
|
||||
unsigned long flags;
|
||||
|
||||
if (!b->irq_engine)
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&b->irq_lock, flags);
|
||||
|
||||
if (b->irq_enabled)
|
||||
irq_enable(engine);
|
||||
irq_enable(b->irq_engine);
|
||||
else
|
||||
irq_disable(engine);
|
||||
irq_disable(b->irq_engine);
|
||||
|
||||
spin_unlock_irqrestore(&b->irq_lock, flags);
|
||||
}
|
||||
|
||||
void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine,
|
||||
struct intel_context *ce)
|
||||
void intel_breadcrumbs_park(struct intel_breadcrumbs *b)
|
||||
{
|
||||
struct intel_breadcrumbs *b = &engine->breadcrumbs;
|
||||
unsigned long flags;
|
||||
|
||||
if (!READ_ONCE(b->irq_armed))
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&b->irq_lock, flags);
|
||||
if (!list_empty(&ce->signals)) {
|
||||
struct i915_request *rq, *next;
|
||||
|
||||
/* Queue for executing the signal callbacks in the irq_work */
|
||||
list_for_each_entry_safe(rq, next, &ce->signals, signal_link) {
|
||||
GEM_BUG_ON(rq->engine != engine);
|
||||
GEM_BUG_ON(!__request_completed(rq));
|
||||
|
||||
__signal_request(rq, &b->signaled_requests);
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&ce->signals);
|
||||
list_del_init(&ce->signal_link);
|
||||
|
||||
irq_work_queue(&b->irq_work);
|
||||
}
|
||||
__intel_breadcrumbs_disarm_irq(b);
|
||||
spin_unlock_irqrestore(&b->irq_lock, flags);
|
||||
|
||||
if (!list_empty(&b->signalers))
|
||||
irq_work_queue(&b->irq_work);
|
||||
}
|
||||
|
||||
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
|
||||
void intel_breadcrumbs_free(struct intel_breadcrumbs *b)
|
||||
{
|
||||
kfree(b);
|
||||
}
|
||||
|
||||
bool i915_request_enable_breadcrumb(struct i915_request *rq)
|
||||
static void insert_breadcrumb(struct i915_request *rq,
|
||||
struct intel_breadcrumbs *b)
|
||||
{
|
||||
lockdep_assert_held(&rq->lock);
|
||||
struct intel_context *ce = rq->context;
|
||||
struct list_head *pos;
|
||||
|
||||
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
|
||||
return true;
|
||||
if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
|
||||
return;
|
||||
|
||||
if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
|
||||
struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
|
||||
struct intel_context *ce = rq->context;
|
||||
struct list_head *pos;
|
||||
i915_request_get(rq);
|
||||
|
||||
spin_lock(&b->irq_lock);
|
||||
|
||||
if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
|
||||
goto unlock;
|
||||
|
||||
if (!__intel_breadcrumbs_arm_irq(b))
|
||||
goto unlock;
|
||||
/*
|
||||
* If the request is already completed, we can transfer it
|
||||
* straight onto a signaled list, and queue the irq worker for
|
||||
* its signal completion.
|
||||
*/
|
||||
if (__request_completed(rq)) {
|
||||
if (__signal_request(rq, &b->signaled_requests))
|
||||
irq_work_queue(&b->irq_work);
|
||||
return;
|
||||
}
|
||||
|
||||
if (list_empty(&ce->signals)) {
|
||||
add_signaling_context(b, ce);
|
||||
pos = &ce->signals;
|
||||
} else {
|
||||
/*
|
||||
* We keep the seqno in retirement order, so we can break
|
||||
* inside intel_engine_signal_breadcrumbs as soon as we've
|
||||
@ -351,24 +357,75 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
|
||||
if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno))
|
||||
break;
|
||||
}
|
||||
list_add(&rq->signal_link, pos);
|
||||
if (pos == &ce->signals) /* catch transitions from empty list */
|
||||
list_move_tail(&ce->signal_link, &b->signalers);
|
||||
GEM_BUG_ON(!check_signal_order(ce, rq));
|
||||
}
|
||||
list_add(&rq->signal_link, pos);
|
||||
GEM_BUG_ON(!check_signal_order(ce, rq));
|
||||
set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
|
||||
|
||||
set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
|
||||
unlock:
|
||||
/* Check after attaching to irq, interrupt may have already fired. */
|
||||
if (__request_completed(rq))
|
||||
irq_work_queue(&b->irq_work);
|
||||
}
|
||||
|
||||
bool i915_request_enable_breadcrumb(struct i915_request *rq)
|
||||
{
|
||||
struct intel_breadcrumbs *b;
|
||||
|
||||
/* Serialises with i915_request_retire() using rq->lock */
|
||||
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Peek at i915_request_submit()/i915_request_unsubmit() status.
|
||||
*
|
||||
* If the request is not yet active (and not signaled), we will
|
||||
* attach the breadcrumb later.
|
||||
*/
|
||||
if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* rq->engine is locked by rq->engine->active.lock. That however
|
||||
* is not known until after rq->engine has been dereferenced and
|
||||
* the lock acquired. Hence we acquire the lock and then validate
|
||||
* that rq->engine still matches the lock we hold for it.
|
||||
*
|
||||
* Here, we are using the breadcrumb lock as a proxy for the
|
||||
* rq->engine->active.lock, and we know that since the breadcrumb
|
||||
* will be serialised within i915_request_submit/i915_request_unsubmit,
|
||||
* the engine cannot change while active as long as we hold the
|
||||
* breadcrumb lock on that engine.
|
||||
*
|
||||
* From the dma_fence_enable_signaling() path, we are outside of the
|
||||
* request submit/unsubmit path, and so we must be more careful to
|
||||
* acquire the right lock.
|
||||
*/
|
||||
b = READ_ONCE(rq->engine)->breadcrumbs;
|
||||
spin_lock(&b->irq_lock);
|
||||
while (unlikely(b != READ_ONCE(rq->engine)->breadcrumbs)) {
|
||||
spin_unlock(&b->irq_lock);
|
||||
b = READ_ONCE(rq->engine)->breadcrumbs;
|
||||
spin_lock(&b->irq_lock);
|
||||
}
|
||||
|
||||
return !__request_completed(rq);
|
||||
/*
|
||||
* Now that we are finally serialised with request submit/unsubmit,
|
||||
* [with b->irq_lock] and with i915_request_retire() [via checking
|
||||
* SIGNALED with rq->lock] confirm the request is indeed active. If
|
||||
* it is no longer active, the breadcrumb will be attached upon
|
||||
* i915_request_submit().
|
||||
*/
|
||||
if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
|
||||
insert_breadcrumb(rq, b);
|
||||
|
||||
spin_unlock(&b->irq_lock);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void i915_request_cancel_breadcrumb(struct i915_request *rq)
|
||||
{
|
||||
struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
|
||||
|
||||
lockdep_assert_held(&rq->lock);
|
||||
struct intel_breadcrumbs *b = rq->engine->breadcrumbs;
|
||||
|
||||
/*
|
||||
* We must wait for b->irq_lock so that we know the interrupt handler
|
||||
@ -382,23 +439,19 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
|
||||
|
||||
list_del(&rq->signal_link);
|
||||
if (list_empty(&ce->signals))
|
||||
list_del_init(&ce->signal_link);
|
||||
remove_signaling_context(b, ce);
|
||||
|
||||
clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
|
||||
i915_request_put(rq);
|
||||
}
|
||||
spin_unlock(&b->irq_lock);
|
||||
}
|
||||
|
||||
void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
|
||||
struct drm_printer *p)
|
||||
static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
|
||||
{
|
||||
struct intel_breadcrumbs *b = &engine->breadcrumbs;
|
||||
struct intel_context *ce;
|
||||
struct i915_request *rq;
|
||||
|
||||
if (list_empty(&b->signalers))
|
||||
return;
|
||||
|
||||
drm_printf(p, "Signals:\n");
|
||||
|
||||
spin_lock_irq(&b->irq_lock);
|
||||
@ -414,3 +467,17 @@ void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
|
||||
}
|
||||
spin_unlock_irq(&b->irq_lock);
|
||||
}
|
||||
|
||||
void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
|
||||
struct drm_printer *p)
|
||||
{
|
||||
struct intel_breadcrumbs *b;
|
||||
|
||||
b = engine->breadcrumbs;
|
||||
if (!b)
|
||||
return;
|
||||
|
||||
drm_printf(p, "IRQ: %s\n", enableddisabled(b->irq_armed));
|
||||
if (!list_empty(&b->signalers))
|
||||
print_signals(b, p);
|
||||
}
|
||||
|
36
drivers/gpu/drm/i915/gt/intel_breadcrumbs.h
Normal file
36
drivers/gpu/drm/i915/gt/intel_breadcrumbs.h
Normal file
@ -0,0 +1,36 @@
|
||||
/* SPDX-License-Identifier: MIT */
|
||||
/*
|
||||
* Copyright © 2019 Intel Corporation
|
||||
*/
|
||||
|
||||
#ifndef __INTEL_BREADCRUMBS__
|
||||
#define __INTEL_BREADCRUMBS__
|
||||
|
||||
#include <linux/irq_work.h>
|
||||
|
||||
#include "intel_engine_types.h"
|
||||
|
||||
struct drm_printer;
|
||||
struct i915_request;
|
||||
struct intel_breadcrumbs;
|
||||
|
||||
struct intel_breadcrumbs *
|
||||
intel_breadcrumbs_create(struct intel_engine_cs *irq_engine);
|
||||
void intel_breadcrumbs_free(struct intel_breadcrumbs *b);
|
||||
|
||||
void intel_breadcrumbs_reset(struct intel_breadcrumbs *b);
|
||||
void intel_breadcrumbs_park(struct intel_breadcrumbs *b);
|
||||
|
||||
static inline void
|
||||
intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
|
||||
{
|
||||
irq_work_queue(&engine->breadcrumbs->irq_work);
|
||||
}
|
||||
|
||||
void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
|
||||
struct drm_printer *p);
|
||||
|
||||
bool i915_request_enable_breadcrumb(struct i915_request *request);
|
||||
void i915_request_cancel_breadcrumb(struct i915_request *request);
|
||||
|
||||
#endif /* __INTEL_BREADCRUMBS__ */
|
47
drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
Normal file
47
drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
Normal file
@ -0,0 +1,47 @@
|
||||
/* SPDX-License-Identifier: MIT */
|
||||
/*
|
||||
* Copyright © 2019 Intel Corporation
|
||||
*/
|
||||
|
||||
#ifndef __INTEL_BREADCRUMBS_TYPES__
|
||||
#define __INTEL_BREADCRUMBS_TYPES__
|
||||
|
||||
#include <linux/irq_work.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
/*
|
||||
* Rather than have every client wait upon all user interrupts,
|
||||
* with the herd waking after every interrupt and each doing the
|
||||
* heavyweight seqno dance, we delegate the task (of being the
|
||||
* bottom-half of the user interrupt) to the first client. After
|
||||
* every interrupt, we wake up one client, who does the heavyweight
|
||||
* coherent seqno read and either goes back to sleep (if incomplete),
|
||||
* or wakes up all the completed clients in parallel, before then
|
||||
* transferring the bottom-half status to the next client in the queue.
|
||||
*
|
||||
* Compared to walking the entire list of waiters in a single dedicated
|
||||
* bottom-half, we reduce the latency of the first waiter by avoiding
|
||||
* a context switch, but incur additional coherent seqno reads when
|
||||
* following the chain of request breadcrumbs. Since it is most likely
|
||||
* that we have a single client waiting on each seqno, then reducing
|
||||
* the overhead of waking that client is much preferred.
|
||||
*/
|
||||
struct intel_breadcrumbs {
|
||||
spinlock_t irq_lock; /* protects the lists used in hardirq context */
|
||||
|
||||
/* Not all breadcrumbs are attached to physical HW */
|
||||
struct intel_engine_cs *irq_engine;
|
||||
|
||||
struct list_head signalers;
|
||||
struct list_head signaled_requests;
|
||||
|
||||
struct irq_work irq_work; /* for use from inside irq_lock */
|
||||
|
||||
unsigned int irq_enabled;
|
||||
|
||||
bool irq_armed;
|
||||
};
|
||||
|
||||
#endif /* __INTEL_BREADCRUMBS_TYPES__ */
|
@ -93,85 +93,12 @@ static void intel_context_active_release(struct intel_context *ce)
|
||||
i915_active_release(&ce->active);
|
||||
}
|
||||
|
||||
int __intel_context_do_pin(struct intel_context *ce)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
|
||||
err = intel_context_alloc_state(ce);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
err = i915_active_acquire(&ce->active);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (mutex_lock_interruptible(&ce->pin_mutex)) {
|
||||
err = -EINTR;
|
||||
goto out_release;
|
||||
}
|
||||
|
||||
if (unlikely(intel_context_is_closed(ce))) {
|
||||
err = -ENOENT;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
|
||||
err = intel_context_active_acquire(ce);
|
||||
if (unlikely(err))
|
||||
goto out_unlock;
|
||||
|
||||
err = ce->ops->pin(ce);
|
||||
if (unlikely(err))
|
||||
goto err_active;
|
||||
|
||||
CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
|
||||
i915_ggtt_offset(ce->ring->vma),
|
||||
ce->ring->head, ce->ring->tail);
|
||||
|
||||
smp_mb__before_atomic(); /* flush pin before it is visible */
|
||||
atomic_inc(&ce->pin_count);
|
||||
}
|
||||
|
||||
GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
|
||||
GEM_BUG_ON(i915_active_is_idle(&ce->active));
|
||||
goto out_unlock;
|
||||
|
||||
err_active:
|
||||
intel_context_active_release(ce);
|
||||
out_unlock:
|
||||
mutex_unlock(&ce->pin_mutex);
|
||||
out_release:
|
||||
i915_active_release(&ce->active);
|
||||
return err;
|
||||
}
|
||||
|
||||
void intel_context_unpin(struct intel_context *ce)
|
||||
{
|
||||
if (!atomic_dec_and_test(&ce->pin_count))
|
||||
return;
|
||||
|
||||
CE_TRACE(ce, "unpin\n");
|
||||
ce->ops->unpin(ce);
|
||||
|
||||
/*
|
||||
* Once released, we may asynchronously drop the active reference.
|
||||
* As that may be the only reference keeping the context alive,
|
||||
* take an extra now so that it is not freed before we finish
|
||||
* dereferencing it.
|
||||
*/
|
||||
intel_context_get(ce);
|
||||
intel_context_active_release(ce);
|
||||
intel_context_put(ce);
|
||||
}
|
||||
|
||||
static int __context_pin_state(struct i915_vma *vma)
|
||||
static int __context_pin_state(struct i915_vma *vma, struct i915_gem_ww_ctx *ww)
|
||||
{
|
||||
unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
|
||||
int err;
|
||||
|
||||
err = i915_ggtt_pin(vma, 0, bias | PIN_HIGH);
|
||||
err = i915_ggtt_pin(vma, ww, 0, bias | PIN_HIGH);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@ -200,11 +127,12 @@ static void __context_unpin_state(struct i915_vma *vma)
|
||||
__i915_vma_unpin(vma);
|
||||
}
|
||||
|
||||
static int __ring_active(struct intel_ring *ring)
|
||||
static int __ring_active(struct intel_ring *ring,
|
||||
struct i915_gem_ww_ctx *ww)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = intel_ring_pin(ring);
|
||||
err = intel_ring_pin(ring, ww);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@ -225,6 +153,173 @@ static void __ring_retire(struct intel_ring *ring)
|
||||
intel_ring_unpin(ring);
|
||||
}
|
||||
|
||||
static int intel_context_pre_pin(struct intel_context *ce,
|
||||
struct i915_gem_ww_ctx *ww)
|
||||
{
|
||||
int err;
|
||||
|
||||
CE_TRACE(ce, "active\n");
|
||||
|
||||
err = __ring_active(ce->ring, ww);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = intel_timeline_pin(ce->timeline, ww);
|
||||
if (err)
|
||||
goto err_ring;
|
||||
|
||||
if (!ce->state)
|
||||
return 0;
|
||||
|
||||
err = __context_pin_state(ce->state, ww);
|
||||
if (err)
|
||||
goto err_timeline;
|
||||
|
||||
|
||||
return 0;
|
||||
|
||||
err_timeline:
|
||||
intel_timeline_unpin(ce->timeline);
|
||||
err_ring:
|
||||
__ring_retire(ce->ring);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void intel_context_post_unpin(struct intel_context *ce)
|
||||
{
|
||||
if (ce->state)
|
||||
__context_unpin_state(ce->state);
|
||||
|
||||
intel_timeline_unpin(ce->timeline);
|
||||
__ring_retire(ce->ring);
|
||||
}
|
||||
|
||||
int __intel_context_do_pin_ww(struct intel_context *ce,
|
||||
struct i915_gem_ww_ctx *ww)
|
||||
{
|
||||
bool handoff = false;
|
||||
void *vaddr;
|
||||
int err = 0;
|
||||
|
||||
if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
|
||||
err = intel_context_alloc_state(ce);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* We always pin the context/ring/timeline here, to ensure a pin
|
||||
* refcount for __intel_context_active(), which prevent a lock
|
||||
* inversion of ce->pin_mutex vs dma_resv_lock().
|
||||
*/
|
||||
|
||||
err = i915_gem_object_lock(ce->timeline->hwsp_ggtt->obj, ww);
|
||||
if (!err && ce->ring->vma->obj)
|
||||
err = i915_gem_object_lock(ce->ring->vma->obj, ww);
|
||||
if (!err && ce->state)
|
||||
err = i915_gem_object_lock(ce->state->obj, ww);
|
||||
if (!err)
|
||||
err = intel_context_pre_pin(ce, ww);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = i915_active_acquire(&ce->active);
|
||||
if (err)
|
||||
goto err_ctx_unpin;
|
||||
|
||||
err = ce->ops->pre_pin(ce, ww, &vaddr);
|
||||
if (err)
|
||||
goto err_release;
|
||||
|
||||
err = mutex_lock_interruptible(&ce->pin_mutex);
|
||||
if (err)
|
||||
goto err_post_unpin;
|
||||
|
||||
if (unlikely(intel_context_is_closed(ce))) {
|
||||
err = -ENOENT;
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
|
||||
err = intel_context_active_acquire(ce);
|
||||
if (unlikely(err))
|
||||
goto err_unlock;
|
||||
|
||||
err = ce->ops->pin(ce, vaddr);
|
||||
if (err) {
|
||||
intel_context_active_release(ce);
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
|
||||
i915_ggtt_offset(ce->ring->vma),
|
||||
ce->ring->head, ce->ring->tail);
|
||||
|
||||
handoff = true;
|
||||
smp_mb__before_atomic(); /* flush pin before it is visible */
|
||||
atomic_inc(&ce->pin_count);
|
||||
}
|
||||
|
||||
GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
|
||||
|
||||
err_unlock:
|
||||
mutex_unlock(&ce->pin_mutex);
|
||||
err_post_unpin:
|
||||
if (!handoff)
|
||||
ce->ops->post_unpin(ce);
|
||||
err_release:
|
||||
i915_active_release(&ce->active);
|
||||
err_ctx_unpin:
|
||||
intel_context_post_unpin(ce);
|
||||
|
||||
/*
|
||||
* Unlock the hwsp_ggtt object since it's shared.
|
||||
* In principle we can unlock all the global state locked above
|
||||
* since it's pinned and doesn't need fencing, and will
|
||||
* thus remain resident until it is explicitly unpinned.
|
||||
*/
|
||||
i915_gem_ww_unlock_single(ce->timeline->hwsp_ggtt->obj);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int __intel_context_do_pin(struct intel_context *ce)
|
||||
{
|
||||
struct i915_gem_ww_ctx ww;
|
||||
int err;
|
||||
|
||||
i915_gem_ww_ctx_init(&ww, true);
|
||||
retry:
|
||||
err = __intel_context_do_pin_ww(ce, &ww);
|
||||
if (err == -EDEADLK) {
|
||||
err = i915_gem_ww_ctx_backoff(&ww);
|
||||
if (!err)
|
||||
goto retry;
|
||||
}
|
||||
i915_gem_ww_ctx_fini(&ww);
|
||||
return err;
|
||||
}
|
||||
|
||||
void intel_context_unpin(struct intel_context *ce)
|
||||
{
|
||||
if (!atomic_dec_and_test(&ce->pin_count))
|
||||
return;
|
||||
|
||||
CE_TRACE(ce, "unpin\n");
|
||||
ce->ops->unpin(ce);
|
||||
ce->ops->post_unpin(ce);
|
||||
|
||||
/*
|
||||
* Once released, we may asynchronously drop the active reference.
|
||||
* As that may be the only reference keeping the context alive,
|
||||
* take an extra now so that it is not freed before we finish
|
||||
* dereferencing it.
|
||||
*/
|
||||
intel_context_get(ce);
|
||||
intel_context_active_release(ce);
|
||||
intel_context_put(ce);
|
||||
}
|
||||
|
||||
__i915_active_call
|
||||
static void __intel_context_retire(struct i915_active *active)
|
||||
{
|
||||
@ -235,48 +330,29 @@ static void __intel_context_retire(struct i915_active *active)
|
||||
intel_context_get_avg_runtime_ns(ce));
|
||||
|
||||
set_bit(CONTEXT_VALID_BIT, &ce->flags);
|
||||
if (ce->state)
|
||||
__context_unpin_state(ce->state);
|
||||
|
||||
intel_timeline_unpin(ce->timeline);
|
||||
__ring_retire(ce->ring);
|
||||
|
||||
intel_context_post_unpin(ce);
|
||||
intel_context_put(ce);
|
||||
}
|
||||
|
||||
static int __intel_context_active(struct i915_active *active)
|
||||
{
|
||||
struct intel_context *ce = container_of(active, typeof(*ce), active);
|
||||
int err;
|
||||
|
||||
CE_TRACE(ce, "active\n");
|
||||
|
||||
intel_context_get(ce);
|
||||
|
||||
err = __ring_active(ce->ring);
|
||||
if (err)
|
||||
goto err_put;
|
||||
/* everything should already be activated by intel_context_pre_pin() */
|
||||
GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->ring->vma->active));
|
||||
__intel_ring_pin(ce->ring);
|
||||
|
||||
err = intel_timeline_pin(ce->timeline);
|
||||
if (err)
|
||||
goto err_ring;
|
||||
__intel_timeline_pin(ce->timeline);
|
||||
|
||||
if (!ce->state)
|
||||
return 0;
|
||||
|
||||
err = __context_pin_state(ce->state);
|
||||
if (err)
|
||||
goto err_timeline;
|
||||
if (ce->state) {
|
||||
GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->state->active));
|
||||
__i915_vma_pin(ce->state);
|
||||
i915_vma_make_unshrinkable(ce->state);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err_timeline:
|
||||
intel_timeline_unpin(ce->timeline);
|
||||
err_ring:
|
||||
__ring_retire(ce->ring);
|
||||
err_put:
|
||||
intel_context_put(ce);
|
||||
return err;
|
||||
}
|
||||
|
||||
void
|
||||
@ -382,15 +458,37 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
|
||||
|
||||
struct i915_request *intel_context_create_request(struct intel_context *ce)
|
||||
{
|
||||
struct i915_gem_ww_ctx ww;
|
||||
struct i915_request *rq;
|
||||
int err;
|
||||
|
||||
err = intel_context_pin(ce);
|
||||
if (unlikely(err))
|
||||
return ERR_PTR(err);
|
||||
i915_gem_ww_ctx_init(&ww, true);
|
||||
retry:
|
||||
err = intel_context_pin_ww(ce, &ww);
|
||||
if (!err) {
|
||||
rq = i915_request_create(ce);
|
||||
intel_context_unpin(ce);
|
||||
} else if (err == -EDEADLK) {
|
||||
err = i915_gem_ww_ctx_backoff(&ww);
|
||||
if (!err)
|
||||
goto retry;
|
||||
} else {
|
||||
rq = ERR_PTR(err);
|
||||
}
|
||||
|
||||
rq = i915_request_create(ce);
|
||||
intel_context_unpin(ce);
|
||||
i915_gem_ww_ctx_fini(&ww);
|
||||
|
||||
if (IS_ERR(rq))
|
||||
return rq;
|
||||
|
||||
/*
|
||||
* timeline->mutex should be the inner lock, but is used as outer lock.
|
||||
* Hack around this to shut up lockdep in selftests..
|
||||
*/
|
||||
lockdep_unpin_lock(&ce->timeline->mutex, rq->cookie);
|
||||
mutex_release(&ce->timeline->mutex.dep_map, _RET_IP_);
|
||||
mutex_acquire(&ce->timeline->mutex.dep_map, SINGLE_DEPTH_NESTING, 0, _RET_IP_);
|
||||
rq->cookie = lockdep_pin_lock(&ce->timeline->mutex);
|
||||
|
||||
return rq;
|
||||
}
|
||||
|
@ -25,6 +25,8 @@
|
||||
##__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
struct i915_gem_ww_ctx;
|
||||
|
||||
void intel_context_init(struct intel_context *ce,
|
||||
struct intel_engine_cs *engine);
|
||||
void intel_context_fini(struct intel_context *ce);
|
||||
@ -81,6 +83,8 @@ static inline void intel_context_unlock_pinned(struct intel_context *ce)
|
||||
}
|
||||
|
||||
int __intel_context_do_pin(struct intel_context *ce);
|
||||
int __intel_context_do_pin_ww(struct intel_context *ce,
|
||||
struct i915_gem_ww_ctx *ww);
|
||||
|
||||
static inline bool intel_context_pin_if_active(struct intel_context *ce)
|
||||
{
|
||||
@ -95,6 +99,15 @@ static inline int intel_context_pin(struct intel_context *ce)
|
||||
return __intel_context_do_pin(ce);
|
||||
}
|
||||
|
||||
static inline int intel_context_pin_ww(struct intel_context *ce,
|
||||
struct i915_gem_ww_ctx *ww)
|
||||
{
|
||||
if (likely(intel_context_pin_if_active(ce)))
|
||||
return 0;
|
||||
|
||||
return __intel_context_do_pin_ww(ce, ww);
|
||||
}
|
||||
|
||||
static inline void __intel_context_pin(struct intel_context *ce)
|
||||
{
|
||||
GEM_BUG_ON(!intel_context_is_pinned(ce));
|
||||
|
@ -23,6 +23,7 @@
|
||||
DECLARE_EWMA(runtime, 3, 8);
|
||||
|
||||
struct i915_gem_context;
|
||||
struct i915_gem_ww_ctx;
|
||||
struct i915_vma;
|
||||
struct intel_context;
|
||||
struct intel_ring;
|
||||
@ -30,8 +31,10 @@ struct intel_ring;
|
||||
struct intel_context_ops {
|
||||
int (*alloc)(struct intel_context *ce);
|
||||
|
||||
int (*pin)(struct intel_context *ce);
|
||||
int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr);
|
||||
int (*pin)(struct intel_context *ce, void *vaddr);
|
||||
void (*unpin)(struct intel_context *ce);
|
||||
void (*post_unpin)(struct intel_context *ce);
|
||||
|
||||
void (*enter)(struct intel_context *ce);
|
||||
void (*exit)(struct intel_context *ce);
|
||||
|
@ -223,26 +223,6 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine,
|
||||
|
||||
void intel_engine_init_execlists(struct intel_engine_cs *engine);
|
||||
|
||||
void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
|
||||
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
|
||||
|
||||
void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
|
||||
|
||||
static inline void
|
||||
intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
|
||||
{
|
||||
irq_work_queue(&engine->breadcrumbs.irq_work);
|
||||
}
|
||||
|
||||
void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
|
||||
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
|
||||
|
||||
void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine,
|
||||
struct intel_context *ce);
|
||||
|
||||
void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
|
||||
struct drm_printer *p);
|
||||
|
||||
static inline u32 *__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
|
||||
{
|
||||
memset(batch, 0, 6 * sizeof(u32));
|
||||
|
@ -28,6 +28,7 @@
|
||||
|
||||
#include "i915_drv.h"
|
||||
|
||||
#include "intel_breadcrumbs.h"
|
||||
#include "intel_context.h"
|
||||
#include "intel_engine.h"
|
||||
#include "intel_engine_pm.h"
|
||||
@ -634,7 +635,7 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
|
||||
else
|
||||
flags = PIN_HIGH;
|
||||
|
||||
return i915_ggtt_pin(vma, 0, flags);
|
||||
return i915_ggtt_pin(vma, NULL, 0, flags);
|
||||
}
|
||||
|
||||
static int init_status_page(struct intel_engine_cs *engine)
|
||||
@ -700,8 +701,13 @@ static int engine_setup_common(struct intel_engine_cs *engine)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
engine->breadcrumbs = intel_breadcrumbs_create(engine);
|
||||
if (!engine->breadcrumbs) {
|
||||
err = -ENOMEM;
|
||||
goto err_status;
|
||||
}
|
||||
|
||||
intel_engine_init_active(engine, ENGINE_PHYSICAL);
|
||||
intel_engine_init_breadcrumbs(engine);
|
||||
intel_engine_init_execlists(engine);
|
||||
intel_engine_init_cmd_parser(engine);
|
||||
intel_engine_init__pm(engine);
|
||||
@ -716,6 +722,10 @@ static int engine_setup_common(struct intel_engine_cs *engine)
|
||||
intel_engine_init_ctx_wa(engine);
|
||||
|
||||
return 0;
|
||||
|
||||
err_status:
|
||||
cleanup_status_page(engine);
|
||||
return err;
|
||||
}
|
||||
|
||||
struct measure_breadcrumb {
|
||||
@ -785,9 +795,11 @@ intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
|
||||
}
|
||||
|
||||
static struct intel_context *
|
||||
create_kernel_context(struct intel_engine_cs *engine)
|
||||
create_pinned_context(struct intel_engine_cs *engine,
|
||||
unsigned int hwsp,
|
||||
struct lock_class_key *key,
|
||||
const char *name)
|
||||
{
|
||||
static struct lock_class_key kernel;
|
||||
struct intel_context *ce;
|
||||
int err;
|
||||
|
||||
@ -796,6 +808,7 @@ create_kernel_context(struct intel_engine_cs *engine)
|
||||
return ce;
|
||||
|
||||
__set_bit(CONTEXT_BARRIER_BIT, &ce->flags);
|
||||
ce->timeline = page_pack_bits(NULL, hwsp);
|
||||
|
||||
err = intel_context_pin(ce); /* perma-pin so it is always available */
|
||||
if (err) {
|
||||
@ -809,11 +822,20 @@ create_kernel_context(struct intel_engine_cs *engine)
|
||||
* should we need to inject GPU operations during their request
|
||||
* construction.
|
||||
*/
|
||||
lockdep_set_class(&ce->timeline->mutex, &kernel);
|
||||
lockdep_set_class_and_name(&ce->timeline->mutex, key, name);
|
||||
|
||||
return ce;
|
||||
}
|
||||
|
||||
static struct intel_context *
|
||||
create_kernel_context(struct intel_engine_cs *engine)
|
||||
{
|
||||
static struct lock_class_key kernel;
|
||||
|
||||
return create_pinned_context(engine, I915_GEM_HWS_SEQNO_ADDR,
|
||||
&kernel, "kernel_context");
|
||||
}
|
||||
|
||||
/**
|
||||
* intel_engines_init_common - initialize cengine state which might require hw access
|
||||
* @engine: Engine to initialize.
|
||||
@ -902,9 +924,9 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
|
||||
tasklet_kill(&engine->execlists.tasklet); /* flush the callback */
|
||||
|
||||
cleanup_status_page(engine);
|
||||
intel_breadcrumbs_free(engine->breadcrumbs);
|
||||
|
||||
intel_engine_fini_retire(engine);
|
||||
intel_engine_fini_breadcrumbs(engine);
|
||||
intel_engine_cleanup_cmd_parser(engine);
|
||||
|
||||
if (engine->default_state)
|
||||
|
@ -6,6 +6,7 @@
|
||||
|
||||
#include "i915_drv.h"
|
||||
|
||||
#include "intel_breadcrumbs.h"
|
||||
#include "intel_context.h"
|
||||
#include "intel_engine.h"
|
||||
#include "intel_engine_heartbeat.h"
|
||||
@ -247,7 +248,7 @@ static int __engine_park(struct intel_wakeref *wf)
|
||||
call_idle_barriers(engine); /* cleanup after wedging */
|
||||
|
||||
intel_engine_park_heartbeat(engine);
|
||||
intel_engine_disarm_breadcrumbs(engine);
|
||||
intel_breadcrumbs_park(engine->breadcrumbs);
|
||||
|
||||
/* Must be reset upon idling, or we may miss the busy wakeup. */
|
||||
GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "i915_pmu.h"
|
||||
#include "i915_priolist_types.h"
|
||||
#include "i915_selftest.h"
|
||||
#include "intel_breadcrumbs_types.h"
|
||||
#include "intel_sseu.h"
|
||||
#include "intel_timeline_types.h"
|
||||
#include "intel_uncore.h"
|
||||
@ -373,34 +374,8 @@ struct intel_engine_cs {
|
||||
*/
|
||||
struct ewma__engine_latency latency;
|
||||
|
||||
/* Rather than have every client wait upon all user interrupts,
|
||||
* with the herd waking after every interrupt and each doing the
|
||||
* heavyweight seqno dance, we delegate the task (of being the
|
||||
* bottom-half of the user interrupt) to the first client. After
|
||||
* every interrupt, we wake up one client, who does the heavyweight
|
||||
* coherent seqno read and either goes back to sleep (if incomplete),
|
||||
* or wakes up all the completed clients in parallel, before then
|
||||
* transferring the bottom-half status to the next client in the queue.
|
||||
*
|
||||
* Compared to walking the entire list of waiters in a single dedicated
|
||||
* bottom-half, we reduce the latency of the first waiter by avoiding
|
||||
* a context switch, but incur additional coherent seqno reads when
|
||||
* following the chain of request breadcrumbs. Since it is most likely
|
||||
* that we have a single client waiting on each seqno, then reducing
|
||||
* the overhead of waking that client is much preferred.
|
||||
*/
|
||||
struct intel_breadcrumbs {
|
||||
spinlock_t irq_lock;
|
||||
struct list_head signalers;
|
||||
|
||||
struct list_head signaled_requests;
|
||||
|
||||
struct irq_work irq_work; /* for use from inside irq_lock */
|
||||
|
||||
unsigned int irq_enabled;
|
||||
|
||||
bool irq_armed;
|
||||
} breadcrumbs;
|
||||
/* Keep track of all the seqno used, a trail of breadcrumbs */
|
||||
struct intel_breadcrumbs *breadcrumbs;
|
||||
|
||||
struct intel_engine_pmu {
|
||||
/**
|
||||
|
@ -78,8 +78,6 @@ int i915_ggtt_init_hw(struct drm_i915_private *i915)
|
||||
{
|
||||
int ret;
|
||||
|
||||
stash_init(&i915->mm.wc_stash);
|
||||
|
||||
/*
|
||||
* Note that we use page colouring to enforce a guard page at the
|
||||
* end of the address space. This is required as the CS may prefetch
|
||||
@ -232,7 +230,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
|
||||
|
||||
/* Fill the allocated but "unused" space beyond the end of the buffer */
|
||||
while (gte < end)
|
||||
gen8_set_pte(gte++, vm->scratch[0].encode);
|
||||
gen8_set_pte(gte++, vm->scratch[0]->encode);
|
||||
|
||||
/*
|
||||
* We want to flush the TLBs only after we're certain all the PTE
|
||||
@ -283,7 +281,7 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
|
||||
|
||||
/* Fill the allocated but "unused" space beyond the end of the buffer */
|
||||
while (gte < end)
|
||||
iowrite32(vm->scratch[0].encode, gte++);
|
||||
iowrite32(vm->scratch[0]->encode, gte++);
|
||||
|
||||
/*
|
||||
* We want to flush the TLBs only after we're certain all the PTE
|
||||
@ -303,7 +301,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
|
||||
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
|
||||
unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
|
||||
unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
|
||||
const gen8_pte_t scratch_pte = vm->scratch[0].encode;
|
||||
const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
|
||||
gen8_pte_t __iomem *gtt_base =
|
||||
(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
|
||||
const int max_entries = ggtt_total_entries(ggtt) - first_entry;
|
||||
@ -401,7 +399,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
|
||||
first_entry, num_entries, max_entries))
|
||||
num_entries = max_entries;
|
||||
|
||||
scratch_pte = vm->scratch[0].encode;
|
||||
scratch_pte = vm->scratch[0]->encode;
|
||||
for (i = 0; i < num_entries; i++)
|
||||
iowrite32(scratch_pte, >t_base[i]);
|
||||
}
|
||||
@ -436,16 +434,17 @@ static void i915_ggtt_clear_range(struct i915_address_space *vm,
|
||||
intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
static int ggtt_bind_vma(struct i915_address_space *vm,
|
||||
struct i915_vma *vma,
|
||||
enum i915_cache_level cache_level,
|
||||
u32 flags)
|
||||
static void ggtt_bind_vma(struct i915_address_space *vm,
|
||||
struct i915_vm_pt_stash *stash,
|
||||
struct i915_vma *vma,
|
||||
enum i915_cache_level cache_level,
|
||||
u32 flags)
|
||||
{
|
||||
struct drm_i915_gem_object *obj = vma->obj;
|
||||
u32 pte_flags;
|
||||
|
||||
if (i915_vma_is_bound(vma, ~flags & I915_VMA_BIND_MASK))
|
||||
return 0;
|
||||
return;
|
||||
|
||||
/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
|
||||
pte_flags = 0;
|
||||
@ -454,8 +453,6 @@ static int ggtt_bind_vma(struct i915_address_space *vm,
|
||||
|
||||
vm->insert_entries(vm, vma, cache_level, pte_flags);
|
||||
vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ggtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
|
||||
@ -568,31 +565,25 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int aliasing_gtt_bind_vma(struct i915_address_space *vm,
|
||||
struct i915_vma *vma,
|
||||
enum i915_cache_level cache_level,
|
||||
u32 flags)
|
||||
static void aliasing_gtt_bind_vma(struct i915_address_space *vm,
|
||||
struct i915_vm_pt_stash *stash,
|
||||
struct i915_vma *vma,
|
||||
enum i915_cache_level cache_level,
|
||||
u32 flags)
|
||||
{
|
||||
u32 pte_flags;
|
||||
int ret;
|
||||
|
||||
/* Currently applicable only to VLV */
|
||||
pte_flags = 0;
|
||||
if (i915_gem_object_is_readonly(vma->obj))
|
||||
pte_flags |= PTE_READ_ONLY;
|
||||
|
||||
if (flags & I915_VMA_LOCAL_BIND) {
|
||||
struct i915_ppgtt *alias = i915_vm_to_ggtt(vm)->alias;
|
||||
|
||||
ret = ppgtt_bind_vma(&alias->vm, vma, cache_level, flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
if (flags & I915_VMA_LOCAL_BIND)
|
||||
ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm,
|
||||
stash, vma, cache_level, flags);
|
||||
|
||||
if (flags & I915_VMA_GLOBAL_BIND)
|
||||
vm->insert_entries(vm, vma, cache_level, pte_flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void aliasing_gtt_unbind_vma(struct i915_address_space *vm,
|
||||
@ -607,6 +598,7 @@ static void aliasing_gtt_unbind_vma(struct i915_address_space *vm,
|
||||
|
||||
static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
|
||||
{
|
||||
struct i915_vm_pt_stash stash = {};
|
||||
struct i915_ppgtt *ppgtt;
|
||||
int err;
|
||||
|
||||
@ -619,15 +611,21 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
|
||||
goto err_ppgtt;
|
||||
}
|
||||
|
||||
err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, ggtt->vm.total);
|
||||
if (err)
|
||||
goto err_ppgtt;
|
||||
|
||||
err = i915_vm_pin_pt_stash(&ppgtt->vm, &stash);
|
||||
if (err)
|
||||
goto err_stash;
|
||||
|
||||
/*
|
||||
* Note we only pre-allocate as far as the end of the global
|
||||
* GTT. On 48b / 4-level page-tables, the difference is very,
|
||||
* very significant! We have to preallocate as GVT/vgpu does
|
||||
* not like the page directory disappearing.
|
||||
*/
|
||||
err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total);
|
||||
if (err)
|
||||
goto err_ppgtt;
|
||||
ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total);
|
||||
|
||||
ggtt->alias = ppgtt;
|
||||
ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
|
||||
@ -638,8 +636,11 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
|
||||
GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma);
|
||||
ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
|
||||
|
||||
i915_vm_free_pt_stash(&ppgtt->vm, &stash);
|
||||
return 0;
|
||||
|
||||
err_stash:
|
||||
i915_vm_free_pt_stash(&ppgtt->vm, &stash);
|
||||
err_ppgtt:
|
||||
i915_vm_put(&ppgtt->vm);
|
||||
return err;
|
||||
@ -715,18 +716,11 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
|
||||
void i915_ggtt_driver_release(struct drm_i915_private *i915)
|
||||
{
|
||||
struct i915_ggtt *ggtt = &i915->ggtt;
|
||||
struct pagevec *pvec;
|
||||
|
||||
fini_aliasing_ppgtt(ggtt);
|
||||
|
||||
intel_ggtt_fini_fences(ggtt);
|
||||
ggtt_cleanup_hw(ggtt);
|
||||
|
||||
pvec = &i915->mm.wc_stash.pvec;
|
||||
if (pvec->nr) {
|
||||
set_pages_array_wb(pvec->pages, pvec->nr);
|
||||
__pagevec_release(pvec);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
|
||||
@ -789,7 +783,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
ret = setup_scratch_page(&ggtt->vm, GFP_DMA32);
|
||||
ret = setup_scratch_page(&ggtt->vm);
|
||||
if (ret) {
|
||||
drm_err(&i915->drm, "Scratch setup failed\n");
|
||||
/* iounmap will also get called at remove, but meh */
|
||||
@ -797,8 +791,8 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ggtt->vm.scratch[0].encode =
|
||||
ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]),
|
||||
ggtt->vm.scratch[0]->encode =
|
||||
ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]),
|
||||
I915_CACHE_NONE, 0);
|
||||
|
||||
return 0;
|
||||
@ -824,7 +818,7 @@ static void gen6_gmch_remove(struct i915_address_space *vm)
|
||||
struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
|
||||
|
||||
iounmap(ggtt->gsm);
|
||||
cleanup_scratch_page(vm);
|
||||
free_scratch(vm);
|
||||
}
|
||||
|
||||
static struct resource pci_resource(struct pci_dev *pdev, int bar)
|
||||
@ -852,6 +846,8 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
|
||||
else
|
||||
size = gen8_get_total_gtt_size(snb_gmch_ctl);
|
||||
|
||||
ggtt->vm.alloc_pt_dma = alloc_pt_dma;
|
||||
|
||||
ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
|
||||
ggtt->vm.cleanup = gen6_gmch_remove;
|
||||
ggtt->vm.insert_page = gen8_ggtt_insert_page;
|
||||
@ -1000,6 +996,8 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
|
||||
size = gen6_get_total_gtt_size(snb_gmch_ctl);
|
||||
ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
|
||||
|
||||
ggtt->vm.alloc_pt_dma = alloc_pt_dma;
|
||||
|
||||
ggtt->vm.clear_range = nop_clear_range;
|
||||
if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
|
||||
ggtt->vm.clear_range = gen6_ggtt_clear_range;
|
||||
@ -1050,6 +1048,8 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt)
|
||||
ggtt->gmadr =
|
||||
(struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end);
|
||||
|
||||
ggtt->vm.alloc_pt_dma = alloc_pt_dma;
|
||||
|
||||
ggtt->do_idle_maps = needs_idle_maps(i915);
|
||||
ggtt->vm.insert_page = i915_ggtt_insert_page;
|
||||
ggtt->vm.insert_entries = i915_ggtt_insert_entries;
|
||||
@ -1165,11 +1165,6 @@ void i915_ggtt_disable_guc(struct i915_ggtt *ggtt)
|
||||
ggtt->invalidate(ggtt);
|
||||
}
|
||||
|
||||
static unsigned int clear_bind(struct i915_vma *vma)
|
||||
{
|
||||
return atomic_fetch_and(~I915_VMA_BIND_MASK, &vma->flags);
|
||||
}
|
||||
|
||||
void i915_ggtt_resume(struct i915_ggtt *ggtt)
|
||||
{
|
||||
struct i915_vma *vma;
|
||||
@ -1187,11 +1182,13 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
|
||||
/* clflush objects bound into the GGTT and rebind them. */
|
||||
list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) {
|
||||
struct drm_i915_gem_object *obj = vma->obj;
|
||||
unsigned int was_bound = clear_bind(vma);
|
||||
unsigned int was_bound =
|
||||
atomic_read(&vma->flags) & I915_VMA_BIND_MASK;
|
||||
|
||||
WARN_ON(i915_vma_bind(vma,
|
||||
obj ? obj->cache_level : 0,
|
||||
was_bound, NULL));
|
||||
GEM_BUG_ON(!was_bound);
|
||||
vma->ops->bind_vma(&ggtt->vm, NULL, vma,
|
||||
obj ? obj->cache_level : 0,
|
||||
was_bound);
|
||||
if (obj) { /* only used during resume => exclusive access */
|
||||
flush |= fetch_and_zero(&obj->write_domain);
|
||||
obj->read_domains |= I915_GEM_DOMAIN_GTT;
|
||||
|
@ -356,7 +356,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
|
||||
goto err_unref;
|
||||
}
|
||||
|
||||
ret = i915_ggtt_pin(vma, 0, PIN_HIGH);
|
||||
ret = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
|
||||
if (ret)
|
||||
goto err_unref;
|
||||
|
||||
@ -406,21 +406,20 @@ static int __engines_record_defaults(struct intel_gt *gt)
|
||||
/* We must be able to switch to something! */
|
||||
GEM_BUG_ON(!engine->kernel_context);
|
||||
|
||||
err = intel_renderstate_init(&so, engine);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
ce = intel_context_create(engine);
|
||||
if (IS_ERR(ce)) {
|
||||
err = PTR_ERR(ce);
|
||||
goto out;
|
||||
}
|
||||
|
||||
rq = intel_context_create_request(ce);
|
||||
err = intel_renderstate_init(&so, ce);
|
||||
if (err)
|
||||
goto err;
|
||||
|
||||
rq = i915_request_create(ce);
|
||||
if (IS_ERR(rq)) {
|
||||
err = PTR_ERR(rq);
|
||||
intel_context_put(ce);
|
||||
goto out;
|
||||
goto err_fini;
|
||||
}
|
||||
|
||||
err = intel_engine_emit_ctx_wa(rq);
|
||||
@ -434,9 +433,13 @@ static int __engines_record_defaults(struct intel_gt *gt)
|
||||
err_rq:
|
||||
requests[id] = i915_request_get(rq);
|
||||
i915_request_add(rq);
|
||||
intel_renderstate_fini(&so);
|
||||
if (err)
|
||||
err_fini:
|
||||
intel_renderstate_fini(&so, ce);
|
||||
err:
|
||||
if (err) {
|
||||
intel_context_put(ce);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* Flush the default context image to memory, and enable powersaving. */
|
||||
|
@ -35,39 +35,65 @@ static void node_free(struct intel_gt_buffer_pool_node *node)
|
||||
{
|
||||
i915_gem_object_put(node->obj);
|
||||
i915_active_fini(&node->active);
|
||||
kfree(node);
|
||||
kfree_rcu(node, rcu);
|
||||
}
|
||||
|
||||
static bool pool_free_older_than(struct intel_gt_buffer_pool *pool, long keep)
|
||||
{
|
||||
struct intel_gt_buffer_pool_node *node, *stale = NULL;
|
||||
bool active = false;
|
||||
int n;
|
||||
|
||||
/* Free buffers that have not been used in the past second */
|
||||
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
|
||||
struct list_head *list = &pool->cache_list[n];
|
||||
|
||||
if (list_empty(list))
|
||||
continue;
|
||||
|
||||
if (spin_trylock_irq(&pool->lock)) {
|
||||
struct list_head *pos;
|
||||
|
||||
/* Most recent at head; oldest at tail */
|
||||
list_for_each_prev(pos, list) {
|
||||
unsigned long age;
|
||||
|
||||
node = list_entry(pos, typeof(*node), link);
|
||||
|
||||
age = READ_ONCE(node->age);
|
||||
if (!age || jiffies - age < keep)
|
||||
break;
|
||||
|
||||
/* Check we are the first to claim this node */
|
||||
if (!xchg(&node->age, 0))
|
||||
break;
|
||||
|
||||
node->free = stale;
|
||||
stale = node;
|
||||
}
|
||||
if (!list_is_last(pos, list))
|
||||
__list_del_many(pos, list);
|
||||
|
||||
spin_unlock_irq(&pool->lock);
|
||||
}
|
||||
|
||||
active |= !list_empty(list);
|
||||
}
|
||||
|
||||
while ((node = stale)) {
|
||||
stale = stale->free;
|
||||
node_free(node);
|
||||
}
|
||||
|
||||
return active;
|
||||
}
|
||||
|
||||
static void pool_free_work(struct work_struct *wrk)
|
||||
{
|
||||
struct intel_gt_buffer_pool *pool =
|
||||
container_of(wrk, typeof(*pool), work.work);
|
||||
struct intel_gt_buffer_pool_node *node, *next;
|
||||
unsigned long old = jiffies - HZ;
|
||||
bool active = false;
|
||||
LIST_HEAD(stale);
|
||||
int n;
|
||||
|
||||
/* Free buffers that have not been used in the past second */
|
||||
spin_lock_irq(&pool->lock);
|
||||
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
|
||||
struct list_head *list = &pool->cache_list[n];
|
||||
|
||||
/* Most recent at head; oldest at tail */
|
||||
list_for_each_entry_safe_reverse(node, next, list, link) {
|
||||
if (time_before(node->age, old))
|
||||
break;
|
||||
|
||||
list_move(&node->link, &stale);
|
||||
}
|
||||
active |= !list_empty(list);
|
||||
}
|
||||
spin_unlock_irq(&pool->lock);
|
||||
|
||||
list_for_each_entry_safe(node, next, &stale, link)
|
||||
node_free(node);
|
||||
|
||||
if (active)
|
||||
if (pool_free_older_than(pool, HZ))
|
||||
schedule_delayed_work(&pool->work,
|
||||
round_jiffies_up_relative(HZ));
|
||||
}
|
||||
@ -109,8 +135,8 @@ static void pool_retire(struct i915_active *ref)
|
||||
i915_gem_object_make_purgeable(node->obj);
|
||||
|
||||
spin_lock_irqsave(&pool->lock, flags);
|
||||
node->age = jiffies;
|
||||
list_add(&node->link, list);
|
||||
list_add_rcu(&node->link, list);
|
||||
WRITE_ONCE(node->age, jiffies ?: 1); /* 0 reserved for active nodes */
|
||||
spin_unlock_irqrestore(&pool->lock, flags);
|
||||
|
||||
schedule_delayed_work(&pool->work,
|
||||
@ -151,20 +177,30 @@ intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size)
|
||||
struct intel_gt_buffer_pool *pool = >->buffer_pool;
|
||||
struct intel_gt_buffer_pool_node *node;
|
||||
struct list_head *list;
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
size = PAGE_ALIGN(size);
|
||||
list = bucket_for_size(pool, size);
|
||||
|
||||
spin_lock_irqsave(&pool->lock, flags);
|
||||
list_for_each_entry(node, list, link) {
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(node, list, link) {
|
||||
unsigned long age;
|
||||
|
||||
if (node->obj->base.size < size)
|
||||
continue;
|
||||
list_del(&node->link);
|
||||
break;
|
||||
|
||||
age = READ_ONCE(node->age);
|
||||
if (!age)
|
||||
continue;
|
||||
|
||||
if (cmpxchg(&node->age, age, 0) == age) {
|
||||
spin_lock_irq(&pool->lock);
|
||||
list_del_rcu(&node->link);
|
||||
spin_unlock_irq(&pool->lock);
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&pool->lock, flags);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (&node->link == list) {
|
||||
node = node_create(pool, size);
|
||||
@ -192,28 +228,13 @@ void intel_gt_init_buffer_pool(struct intel_gt *gt)
|
||||
INIT_DELAYED_WORK(&pool->work, pool_free_work);
|
||||
}
|
||||
|
||||
static void pool_free_imm(struct intel_gt_buffer_pool *pool)
|
||||
{
|
||||
int n;
|
||||
|
||||
spin_lock_irq(&pool->lock);
|
||||
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
|
||||
struct intel_gt_buffer_pool_node *node, *next;
|
||||
struct list_head *list = &pool->cache_list[n];
|
||||
|
||||
list_for_each_entry_safe(node, next, list, link)
|
||||
node_free(node);
|
||||
INIT_LIST_HEAD(list);
|
||||
}
|
||||
spin_unlock_irq(&pool->lock);
|
||||
}
|
||||
|
||||
void intel_gt_flush_buffer_pool(struct intel_gt *gt)
|
||||
{
|
||||
struct intel_gt_buffer_pool *pool = >->buffer_pool;
|
||||
|
||||
do {
|
||||
pool_free_imm(pool);
|
||||
while (pool_free_older_than(pool, 0))
|
||||
;
|
||||
} while (cancel_delayed_work_sync(&pool->work));
|
||||
}
|
||||
|
||||
|
@ -25,7 +25,11 @@ struct intel_gt_buffer_pool_node {
|
||||
struct i915_active active;
|
||||
struct drm_i915_gem_object *obj;
|
||||
struct list_head link;
|
||||
struct intel_gt_buffer_pool *pool;
|
||||
union {
|
||||
struct intel_gt_buffer_pool *pool;
|
||||
struct intel_gt_buffer_pool_node *free;
|
||||
struct rcu_head rcu;
|
||||
};
|
||||
unsigned long age;
|
||||
};
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
|
||||
#include "i915_drv.h"
|
||||
#include "i915_irq.h"
|
||||
#include "intel_breadcrumbs.h"
|
||||
#include "intel_gt.h"
|
||||
#include "intel_gt_irq.h"
|
||||
#include "intel_uncore.h"
|
||||
|
@ -11,160 +11,24 @@
|
||||
#include "intel_gt.h"
|
||||
#include "intel_gtt.h"
|
||||
|
||||
void stash_init(struct pagestash *stash)
|
||||
struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz)
|
||||
{
|
||||
pagevec_init(&stash->pvec);
|
||||
spin_lock_init(&stash->lock);
|
||||
}
|
||||
|
||||
static struct page *stash_pop_page(struct pagestash *stash)
|
||||
{
|
||||
struct page *page = NULL;
|
||||
|
||||
spin_lock(&stash->lock);
|
||||
if (likely(stash->pvec.nr))
|
||||
page = stash->pvec.pages[--stash->pvec.nr];
|
||||
spin_unlock(&stash->lock);
|
||||
|
||||
return page;
|
||||
}
|
||||
|
||||
static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec)
|
||||
{
|
||||
unsigned int nr;
|
||||
|
||||
spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING);
|
||||
|
||||
nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec));
|
||||
memcpy(stash->pvec.pages + stash->pvec.nr,
|
||||
pvec->pages + pvec->nr - nr,
|
||||
sizeof(pvec->pages[0]) * nr);
|
||||
stash->pvec.nr += nr;
|
||||
|
||||
spin_unlock(&stash->lock);
|
||||
|
||||
pvec->nr -= nr;
|
||||
}
|
||||
|
||||
static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
|
||||
{
|
||||
struct pagevec stack;
|
||||
struct page *page;
|
||||
|
||||
if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
|
||||
i915_gem_shrink_all(vm->i915);
|
||||
|
||||
page = stash_pop_page(&vm->free_pages);
|
||||
if (page)
|
||||
return page;
|
||||
|
||||
if (!vm->pt_kmap_wc)
|
||||
return alloc_page(gfp);
|
||||
|
||||
/* Look in our global stash of WC pages... */
|
||||
page = stash_pop_page(&vm->i915->mm.wc_stash);
|
||||
if (page)
|
||||
return page;
|
||||
|
||||
/*
|
||||
* Otherwise batch allocate pages to amortize cost of set_pages_wc.
|
||||
*
|
||||
* We have to be careful as page allocation may trigger the shrinker
|
||||
* (via direct reclaim) which will fill up the WC stash underneath us.
|
||||
* So we add our WB pages into a temporary pvec on the stack and merge
|
||||
* them into the WC stash after all the allocations are complete.
|
||||
*/
|
||||
pagevec_init(&stack);
|
||||
do {
|
||||
struct page *page;
|
||||
|
||||
page = alloc_page(gfp);
|
||||
if (unlikely(!page))
|
||||
break;
|
||||
|
||||
stack.pages[stack.nr++] = page;
|
||||
} while (pagevec_space(&stack));
|
||||
|
||||
if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) {
|
||||
page = stack.pages[--stack.nr];
|
||||
|
||||
/* Merge spare WC pages to the global stash */
|
||||
if (stack.nr)
|
||||
stash_push_pagevec(&vm->i915->mm.wc_stash, &stack);
|
||||
|
||||
/* Push any surplus WC pages onto the local VM stash */
|
||||
if (stack.nr)
|
||||
stash_push_pagevec(&vm->free_pages, &stack);
|
||||
}
|
||||
|
||||
/* Return unwanted leftovers */
|
||||
if (unlikely(stack.nr)) {
|
||||
WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr));
|
||||
__pagevec_release(&stack);
|
||||
}
|
||||
|
||||
return page;
|
||||
return i915_gem_object_create_internal(vm->i915, sz);
|
||||
}
|
||||
|
||||
static void vm_free_pages_release(struct i915_address_space *vm,
|
||||
bool immediate)
|
||||
int pin_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj)
|
||||
{
|
||||
struct pagevec *pvec = &vm->free_pages.pvec;
|
||||
struct pagevec stack;
|
||||
int err;
|
||||
|
||||
lockdep_assert_held(&vm->free_pages.lock);
|
||||
GEM_BUG_ON(!pagevec_count(pvec));
|
||||
err = i915_gem_object_pin_pages(obj);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (vm->pt_kmap_wc) {
|
||||
/*
|
||||
* When we use WC, first fill up the global stash and then
|
||||
* only if full immediately free the overflow.
|
||||
*/
|
||||
stash_push_pagevec(&vm->i915->mm.wc_stash, pvec);
|
||||
|
||||
/*
|
||||
* As we have made some room in the VM's free_pages,
|
||||
* we can wait for it to fill again. Unless we are
|
||||
* inside i915_address_space_fini() and must
|
||||
* immediately release the pages!
|
||||
*/
|
||||
if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1))
|
||||
return;
|
||||
|
||||
/*
|
||||
* We have to drop the lock to allow ourselves to sleep,
|
||||
* so take a copy of the pvec and clear the stash for
|
||||
* others to use it as we sleep.
|
||||
*/
|
||||
stack = *pvec;
|
||||
pagevec_reinit(pvec);
|
||||
spin_unlock(&vm->free_pages.lock);
|
||||
|
||||
pvec = &stack;
|
||||
set_pages_array_wb(pvec->pages, pvec->nr);
|
||||
|
||||
spin_lock(&vm->free_pages.lock);
|
||||
}
|
||||
|
||||
__pagevec_release(pvec);
|
||||
}
|
||||
|
||||
static void vm_free_page(struct i915_address_space *vm, struct page *page)
|
||||
{
|
||||
/*
|
||||
* On !llc, we need to change the pages back to WB. We only do so
|
||||
* in bulk, so we rarely need to change the page attributes here,
|
||||
* but doing so requires a stop_machine() from deep inside arch/x86/mm.
|
||||
* To make detection of the possible sleep more likely, use an
|
||||
* unconditional might_sleep() for everybody.
|
||||
*/
|
||||
might_sleep();
|
||||
spin_lock(&vm->free_pages.lock);
|
||||
while (!pagevec_space(&vm->free_pages.pvec))
|
||||
vm_free_pages_release(vm, false);
|
||||
GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE);
|
||||
pagevec_add(&vm->free_pages.pvec, page);
|
||||
spin_unlock(&vm->free_pages.lock);
|
||||
i915_gem_object_make_unshrinkable(obj);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __i915_vm_close(struct i915_address_space *vm)
|
||||
@ -194,14 +58,7 @@ void __i915_vm_close(struct i915_address_space *vm)
|
||||
|
||||
void i915_address_space_fini(struct i915_address_space *vm)
|
||||
{
|
||||
spin_lock(&vm->free_pages.lock);
|
||||
if (pagevec_count(&vm->free_pages.pvec))
|
||||
vm_free_pages_release(vm, true);
|
||||
GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec));
|
||||
spin_unlock(&vm->free_pages.lock);
|
||||
|
||||
drm_mm_takedown(&vm->mm);
|
||||
|
||||
mutex_destroy(&vm->mutex);
|
||||
}
|
||||
|
||||
@ -246,8 +103,6 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
|
||||
drm_mm_init(&vm->mm, 0, vm->total);
|
||||
vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
|
||||
|
||||
stash_init(&vm->free_pages);
|
||||
|
||||
INIT_LIST_HEAD(&vm->bound_list);
|
||||
}
|
||||
|
||||
@ -264,64 +119,50 @@ void clear_pages(struct i915_vma *vma)
|
||||
memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
|
||||
}
|
||||
|
||||
static int __setup_page_dma(struct i915_address_space *vm,
|
||||
struct i915_page_dma *p,
|
||||
gfp_t gfp)
|
||||
dma_addr_t __px_dma(struct drm_i915_gem_object *p)
|
||||
{
|
||||
p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL);
|
||||
if (unlikely(!p->page))
|
||||
return -ENOMEM;
|
||||
|
||||
p->daddr = dma_map_page_attrs(vm->dma,
|
||||
p->page, 0, PAGE_SIZE,
|
||||
PCI_DMA_BIDIRECTIONAL,
|
||||
DMA_ATTR_SKIP_CPU_SYNC |
|
||||
DMA_ATTR_NO_WARN);
|
||||
if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
|
||||
vm_free_page(vm, p->page);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
GEM_BUG_ON(!i915_gem_object_has_pages(p));
|
||||
return sg_dma_address(p->mm.pages->sgl);
|
||||
}
|
||||
|
||||
int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p)
|
||||
struct page *__px_page(struct drm_i915_gem_object *p)
|
||||
{
|
||||
return __setup_page_dma(vm, p, __GFP_HIGHMEM);
|
||||
}
|
||||
|
||||
void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p)
|
||||
{
|
||||
dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
|
||||
vm_free_page(vm, p->page);
|
||||
GEM_BUG_ON(!i915_gem_object_has_pages(p));
|
||||
return sg_page(p->mm.pages->sgl);
|
||||
}
|
||||
|
||||
void
|
||||
fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count)
|
||||
fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count)
|
||||
{
|
||||
kunmap_atomic(memset64(kmap_atomic(p->page), val, count));
|
||||
struct page *page = __px_page(p);
|
||||
void *vaddr;
|
||||
|
||||
vaddr = kmap(page);
|
||||
memset64(vaddr, val, count);
|
||||
clflush_cache_range(vaddr, PAGE_SIZE);
|
||||
kunmap(page);
|
||||
}
|
||||
|
||||
static void poison_scratch_page(struct page *page, unsigned long size)
|
||||
static void poison_scratch_page(struct drm_i915_gem_object *scratch)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
|
||||
return;
|
||||
struct sgt_iter sgt;
|
||||
struct page *page;
|
||||
u8 val;
|
||||
|
||||
GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
|
||||
val = 0;
|
||||
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
|
||||
val = POISON_FREE;
|
||||
|
||||
do {
|
||||
for_each_sgt_page(page, sgt, scratch->mm.pages) {
|
||||
void *vaddr;
|
||||
|
||||
vaddr = kmap(page);
|
||||
memset(vaddr, POISON_FREE, PAGE_SIZE);
|
||||
memset(vaddr, val, PAGE_SIZE);
|
||||
kunmap(page);
|
||||
|
||||
page = pfn_to_page(page_to_pfn(page) + 1);
|
||||
size -= PAGE_SIZE;
|
||||
} while (size);
|
||||
}
|
||||
}
|
||||
|
||||
int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
|
||||
int setup_scratch_page(struct i915_address_space *vm)
|
||||
{
|
||||
unsigned long size;
|
||||
|
||||
@ -338,21 +179,27 @@ int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
|
||||
*/
|
||||
size = I915_GTT_PAGE_SIZE_4K;
|
||||
if (i915_vm_is_4lvl(vm) &&
|
||||
HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
|
||||
HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K))
|
||||
size = I915_GTT_PAGE_SIZE_64K;
|
||||
gfp |= __GFP_NOWARN;
|
||||
}
|
||||
gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL;
|
||||
|
||||
do {
|
||||
unsigned int order = get_order(size);
|
||||
struct page *page;
|
||||
dma_addr_t addr;
|
||||
struct drm_i915_gem_object *obj;
|
||||
|
||||
page = alloc_pages(gfp, order);
|
||||
if (unlikely(!page))
|
||||
obj = vm->alloc_pt_dma(vm, size);
|
||||
if (IS_ERR(obj))
|
||||
goto skip;
|
||||
|
||||
if (pin_pt_dma(vm, obj))
|
||||
goto skip_obj;
|
||||
|
||||
/* We need a single contiguous page for our scratch */
|
||||
if (obj->mm.page_sizes.sg < size)
|
||||
goto skip_obj;
|
||||
|
||||
/* And it needs to be correspondingly aligned */
|
||||
if (__px_dma(obj) & (size - 1))
|
||||
goto skip_obj;
|
||||
|
||||
/*
|
||||
* Use a non-zero scratch page for debugging.
|
||||
*
|
||||
@ -362,61 +209,28 @@ int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
|
||||
* should it ever be accidentally used, the effect should be
|
||||
* fairly benign.
|
||||
*/
|
||||
poison_scratch_page(page, size);
|
||||
poison_scratch_page(obj);
|
||||
|
||||
addr = dma_map_page_attrs(vm->dma,
|
||||
page, 0, size,
|
||||
PCI_DMA_BIDIRECTIONAL,
|
||||
DMA_ATTR_SKIP_CPU_SYNC |
|
||||
DMA_ATTR_NO_WARN);
|
||||
if (unlikely(dma_mapping_error(vm->dma, addr)))
|
||||
goto free_page;
|
||||
|
||||
if (unlikely(!IS_ALIGNED(addr, size)))
|
||||
goto unmap_page;
|
||||
|
||||
vm->scratch[0].base.page = page;
|
||||
vm->scratch[0].base.daddr = addr;
|
||||
vm->scratch_order = order;
|
||||
vm->scratch[0] = obj;
|
||||
vm->scratch_order = get_order(size);
|
||||
return 0;
|
||||
|
||||
unmap_page:
|
||||
dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL);
|
||||
free_page:
|
||||
__free_pages(page, order);
|
||||
skip_obj:
|
||||
i915_gem_object_put(obj);
|
||||
skip:
|
||||
if (size == I915_GTT_PAGE_SIZE_4K)
|
||||
return -ENOMEM;
|
||||
|
||||
size = I915_GTT_PAGE_SIZE_4K;
|
||||
gfp &= ~__GFP_NOWARN;
|
||||
} while (1);
|
||||
}
|
||||
|
||||
void cleanup_scratch_page(struct i915_address_space *vm)
|
||||
{
|
||||
struct i915_page_dma *p = px_base(&vm->scratch[0]);
|
||||
unsigned int order = vm->scratch_order;
|
||||
|
||||
dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT,
|
||||
PCI_DMA_BIDIRECTIONAL);
|
||||
__free_pages(p->page, order);
|
||||
}
|
||||
|
||||
void free_scratch(struct i915_address_space *vm)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */
|
||||
return;
|
||||
|
||||
for (i = 1; i <= vm->top; i++) {
|
||||
if (!px_dma(&vm->scratch[i]))
|
||||
break;
|
||||
cleanup_page_dma(vm, px_base(&vm->scratch[i]));
|
||||
}
|
||||
|
||||
cleanup_scratch_page(vm);
|
||||
for (i = 0; i <= vm->top; i++)
|
||||
i915_gem_object_put(vm->scratch[i]);
|
||||
}
|
||||
|
||||
void gtt_write_workarounds(struct intel_gt *gt)
|
||||
|
@ -134,38 +134,29 @@ typedef u64 gen8_pte_t;
|
||||
#define GEN8_PDE_IPS_64K BIT(11)
|
||||
#define GEN8_PDE_PS_2M BIT(7)
|
||||
|
||||
enum i915_cache_level;
|
||||
|
||||
struct drm_i915_file_private;
|
||||
struct drm_i915_gem_object;
|
||||
struct i915_fence_reg;
|
||||
struct i915_vma;
|
||||
struct intel_gt;
|
||||
|
||||
#define for_each_sgt_daddr(__dp, __iter, __sgt) \
|
||||
__for_each_sgt_daddr(__dp, __iter, __sgt, I915_GTT_PAGE_SIZE)
|
||||
|
||||
struct i915_page_dma {
|
||||
struct page *page;
|
||||
union {
|
||||
dma_addr_t daddr;
|
||||
|
||||
/*
|
||||
* For gen6/gen7 only. This is the offset in the GGTT
|
||||
* where the page directory entries for PPGTT begin
|
||||
*/
|
||||
u32 ggtt_offset;
|
||||
};
|
||||
};
|
||||
|
||||
struct i915_page_scratch {
|
||||
struct i915_page_dma base;
|
||||
u64 encode;
|
||||
};
|
||||
|
||||
struct i915_page_table {
|
||||
struct i915_page_dma base;
|
||||
atomic_t used;
|
||||
struct drm_i915_gem_object *base;
|
||||
union {
|
||||
atomic_t used;
|
||||
struct i915_page_table *stash;
|
||||
};
|
||||
};
|
||||
|
||||
struct i915_page_directory {
|
||||
struct i915_page_table pt;
|
||||
spinlock_t lock;
|
||||
void *entry[512];
|
||||
void **entry;
|
||||
};
|
||||
|
||||
#define __px_choose_expr(x, type, expr, other) \
|
||||
@ -176,12 +167,14 @@ struct i915_page_directory {
|
||||
other)
|
||||
|
||||
#define px_base(px) \
|
||||
__px_choose_expr(px, struct i915_page_dma *, __x, \
|
||||
__px_choose_expr(px, struct i915_page_scratch *, &__x->base, \
|
||||
__px_choose_expr(px, struct i915_page_table *, &__x->base, \
|
||||
__px_choose_expr(px, struct i915_page_directory *, &__x->pt.base, \
|
||||
(void)0))))
|
||||
#define px_dma(px) (px_base(px)->daddr)
|
||||
__px_choose_expr(px, struct drm_i915_gem_object *, __x, \
|
||||
__px_choose_expr(px, struct i915_page_table *, __x->base, \
|
||||
__px_choose_expr(px, struct i915_page_directory *, __x->pt.base, \
|
||||
(void)0)))
|
||||
|
||||
struct page *__px_page(struct drm_i915_gem_object *p);
|
||||
dma_addr_t __px_dma(struct drm_i915_gem_object *p);
|
||||
#define px_dma(px) (__px_dma(px_base(px)))
|
||||
|
||||
#define px_pt(px) \
|
||||
__px_choose_expr(px, struct i915_page_table *, __x, \
|
||||
@ -189,19 +182,18 @@ struct i915_page_directory {
|
||||
(void)0))
|
||||
#define px_used(px) (&px_pt(px)->used)
|
||||
|
||||
enum i915_cache_level;
|
||||
|
||||
struct drm_i915_file_private;
|
||||
struct drm_i915_gem_object;
|
||||
struct i915_vma;
|
||||
struct intel_gt;
|
||||
struct i915_vm_pt_stash {
|
||||
/* preallocated chains of page tables/directories */
|
||||
struct i915_page_table *pt[2];
|
||||
};
|
||||
|
||||
struct i915_vma_ops {
|
||||
/* Map an object into an address space with the given cache flags. */
|
||||
int (*bind_vma)(struct i915_address_space *vm,
|
||||
struct i915_vma *vma,
|
||||
enum i915_cache_level cache_level,
|
||||
u32 flags);
|
||||
void (*bind_vma)(struct i915_address_space *vm,
|
||||
struct i915_vm_pt_stash *stash,
|
||||
struct i915_vma *vma,
|
||||
enum i915_cache_level cache_level,
|
||||
u32 flags);
|
||||
/*
|
||||
* Unmap an object from an address space. This usually consists of
|
||||
* setting the valid PTE entries to a reserved scratch page.
|
||||
@ -213,13 +205,6 @@ struct i915_vma_ops {
|
||||
void (*clear_pages)(struct i915_vma *vma);
|
||||
};
|
||||
|
||||
struct pagestash {
|
||||
spinlock_t lock;
|
||||
struct pagevec pvec;
|
||||
};
|
||||
|
||||
void stash_init(struct pagestash *stash);
|
||||
|
||||
struct i915_address_space {
|
||||
struct kref ref;
|
||||
struct rcu_work rcu;
|
||||
@ -256,33 +241,33 @@ struct i915_address_space {
|
||||
#define VM_CLASS_GGTT 0
|
||||
#define VM_CLASS_PPGTT 1
|
||||
|
||||
struct i915_page_scratch scratch[4];
|
||||
unsigned int scratch_order;
|
||||
unsigned int top;
|
||||
|
||||
struct drm_i915_gem_object *scratch[4];
|
||||
/**
|
||||
* List of vma currently bound.
|
||||
*/
|
||||
struct list_head bound_list;
|
||||
|
||||
struct pagestash free_pages;
|
||||
|
||||
/* Global GTT */
|
||||
bool is_ggtt:1;
|
||||
|
||||
/* Some systems require uncached updates of the page directories */
|
||||
bool pt_kmap_wc:1;
|
||||
|
||||
/* Some systems support read-only mappings for GGTT and/or PPGTT */
|
||||
bool has_read_only:1;
|
||||
|
||||
u8 top;
|
||||
u8 pd_shift;
|
||||
u8 scratch_order;
|
||||
|
||||
struct drm_i915_gem_object *
|
||||
(*alloc_pt_dma)(struct i915_address_space *vm, int sz);
|
||||
|
||||
u64 (*pte_encode)(dma_addr_t addr,
|
||||
enum i915_cache_level level,
|
||||
u32 flags); /* Create a valid PTE */
|
||||
#define PTE_READ_ONLY BIT(0)
|
||||
|
||||
int (*allocate_va_range)(struct i915_address_space *vm,
|
||||
u64 start, u64 length);
|
||||
void (*allocate_va_range)(struct i915_address_space *vm,
|
||||
struct i915_vm_pt_stash *stash,
|
||||
u64 start, u64 length);
|
||||
void (*clear_range)(struct i915_address_space *vm,
|
||||
u64 start, u64 length);
|
||||
void (*insert_page)(struct i915_address_space *vm,
|
||||
@ -490,9 +475,9 @@ i915_pd_entry(const struct i915_page_directory * const pdp,
|
||||
static inline dma_addr_t
|
||||
i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
|
||||
{
|
||||
struct i915_page_dma *pt = ppgtt->pd->entry[n];
|
||||
struct i915_page_table *pt = ppgtt->pd->entry[n];
|
||||
|
||||
return px_dma(pt ?: px_base(&ppgtt->vm.scratch[ppgtt->vm.top]));
|
||||
return __px_dma(pt ? px_base(pt) : ppgtt->vm.scratch[ppgtt->vm.top]);
|
||||
}
|
||||
|
||||
void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt);
|
||||
@ -517,13 +502,10 @@ struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt);
|
||||
void i915_ggtt_suspend(struct i915_ggtt *gtt);
|
||||
void i915_ggtt_resume(struct i915_ggtt *ggtt);
|
||||
|
||||
int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p);
|
||||
void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p);
|
||||
|
||||
#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
|
||||
#define kmap_atomic_px(px) kmap_atomic(__px_page(px_base(px)))
|
||||
|
||||
void
|
||||
fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count);
|
||||
fill_page_dma(struct drm_i915_gem_object *p, const u64 val, unsigned int count);
|
||||
|
||||
#define fill_px(px, v) fill_page_dma(px_base(px), (v), PAGE_SIZE / sizeof(u64))
|
||||
#define fill32_px(px, v) do { \
|
||||
@ -531,47 +513,51 @@ fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count);
|
||||
fill_px((px), v__ << 32 | v__); \
|
||||
} while (0)
|
||||
|
||||
int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp);
|
||||
void cleanup_scratch_page(struct i915_address_space *vm);
|
||||
int setup_scratch_page(struct i915_address_space *vm);
|
||||
void free_scratch(struct i915_address_space *vm);
|
||||
|
||||
struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz);
|
||||
struct i915_page_table *alloc_pt(struct i915_address_space *vm);
|
||||
struct i915_page_directory *alloc_pd(struct i915_address_space *vm);
|
||||
struct i915_page_directory *__alloc_pd(size_t sz);
|
||||
struct i915_page_directory *__alloc_pd(int npde);
|
||||
|
||||
void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd);
|
||||
int pin_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj);
|
||||
|
||||
#define free_px(vm, px) free_pd(vm, px_base(px))
|
||||
void free_px(struct i915_address_space *vm,
|
||||
struct i915_page_table *pt, int lvl);
|
||||
#define free_pt(vm, px) free_px(vm, px, 0)
|
||||
#define free_pd(vm, px) free_px(vm, px_pt(px), 1)
|
||||
|
||||
void
|
||||
__set_pd_entry(struct i915_page_directory * const pd,
|
||||
const unsigned short idx,
|
||||
struct i915_page_dma * const to,
|
||||
struct i915_page_table *pt,
|
||||
u64 (*encode)(const dma_addr_t, const enum i915_cache_level));
|
||||
|
||||
#define set_pd_entry(pd, idx, to) \
|
||||
__set_pd_entry((pd), (idx), px_base(to), gen8_pde_encode)
|
||||
__set_pd_entry((pd), (idx), px_pt(to), gen8_pde_encode)
|
||||
|
||||
void
|
||||
clear_pd_entry(struct i915_page_directory * const pd,
|
||||
const unsigned short idx,
|
||||
const struct i915_page_scratch * const scratch);
|
||||
const struct drm_i915_gem_object * const scratch);
|
||||
|
||||
bool
|
||||
release_pd_entry(struct i915_page_directory * const pd,
|
||||
const unsigned short idx,
|
||||
struct i915_page_table * const pt,
|
||||
const struct i915_page_scratch * const scratch);
|
||||
const struct drm_i915_gem_object * const scratch);
|
||||
void gen6_ggtt_invalidate(struct i915_ggtt *ggtt);
|
||||
|
||||
int ggtt_set_pages(struct i915_vma *vma);
|
||||
int ppgtt_set_pages(struct i915_vma *vma);
|
||||
void clear_pages(struct i915_vma *vma);
|
||||
|
||||
int ppgtt_bind_vma(struct i915_address_space *vm,
|
||||
struct i915_vma *vma,
|
||||
enum i915_cache_level cache_level,
|
||||
u32 flags);
|
||||
void ppgtt_bind_vma(struct i915_address_space *vm,
|
||||
struct i915_vm_pt_stash *stash,
|
||||
struct i915_vma *vma,
|
||||
enum i915_cache_level cache_level,
|
||||
u32 flags);
|
||||
void ppgtt_unbind_vma(struct i915_address_space *vm,
|
||||
struct i915_vma *vma);
|
||||
|
||||
@ -579,6 +565,14 @@ void gtt_write_workarounds(struct intel_gt *gt);
|
||||
|
||||
void setup_private_pat(struct intel_uncore *uncore);
|
||||
|
||||
int i915_vm_alloc_pt_stash(struct i915_address_space *vm,
|
||||
struct i915_vm_pt_stash *stash,
|
||||
u64 size);
|
||||
int i915_vm_pin_pt_stash(struct i915_address_space *vm,
|
||||
struct i915_vm_pt_stash *stash);
|
||||
void i915_vm_free_pt_stash(struct i915_address_space *vm,
|
||||
struct i915_vm_pt_stash *stash);
|
||||
|
||||
static inline struct sgt_dma {
|
||||
struct scatterlist *sg;
|
||||
dma_addr_t dma, max;
|
||||
|
@ -137,6 +137,7 @@
|
||||
#include "i915_perf.h"
|
||||
#include "i915_trace.h"
|
||||
#include "i915_vgpu.h"
|
||||
#include "intel_breadcrumbs.h"
|
||||
#include "intel_context.h"
|
||||
#include "intel_engine_pm.h"
|
||||
#include "intel_gt.h"
|
||||
@ -1148,20 +1149,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
|
||||
} else {
|
||||
struct intel_engine_cs *owner = rq->context->engine;
|
||||
|
||||
/*
|
||||
* Decouple the virtual breadcrumb before moving it
|
||||
* back to the virtual engine -- we don't want the
|
||||
* request to complete in the background and try
|
||||
* and cancel the breadcrumb on the virtual engine
|
||||
* (instead of the old engine where it is linked)!
|
||||
*/
|
||||
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
|
||||
&rq->fence.flags)) {
|
||||
spin_lock_nested(&rq->lock,
|
||||
SINGLE_DEPTH_NESTING);
|
||||
i915_request_cancel_breadcrumb(rq);
|
||||
spin_unlock(&rq->lock);
|
||||
}
|
||||
WRITE_ONCE(rq->engine, owner);
|
||||
owner->submit_request(rq);
|
||||
active = NULL;
|
||||
@ -1819,16 +1806,31 @@ static bool virtual_matches(const struct virtual_engine *ve,
|
||||
return true;
|
||||
}
|
||||
|
||||
static void virtual_xfer_breadcrumbs(struct virtual_engine *ve)
|
||||
static void virtual_xfer_context(struct virtual_engine *ve,
|
||||
struct intel_engine_cs *engine)
|
||||
{
|
||||
unsigned int n;
|
||||
|
||||
if (likely(engine == ve->siblings[0]))
|
||||
return;
|
||||
|
||||
GEM_BUG_ON(READ_ONCE(ve->context.inflight));
|
||||
if (!intel_engine_has_relative_mmio(engine))
|
||||
virtual_update_register_offsets(ve->context.lrc_reg_state,
|
||||
engine);
|
||||
|
||||
/*
|
||||
* All the outstanding signals on ve->siblings[0] must have
|
||||
* been completed, just pending the interrupt handler. As those
|
||||
* signals still refer to the old sibling (via rq->engine), we must
|
||||
* transfer those to the old irq_worker to keep our locking
|
||||
* consistent.
|
||||
* Move the bound engine to the top of the list for
|
||||
* future execution. We then kick this tasklet first
|
||||
* before checking others, so that we preferentially
|
||||
* reuse this set of bound registers.
|
||||
*/
|
||||
intel_engine_transfer_stale_breadcrumbs(ve->siblings[0], &ve->context);
|
||||
for (n = 1; n < ve->num_siblings; n++) {
|
||||
if (ve->siblings[n] == engine) {
|
||||
swap(ve->siblings[n], ve->siblings[0]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define for_each_waiter(p__, rq__) \
|
||||
@ -2060,6 +2062,14 @@ static inline void clear_ports(struct i915_request **ports, int count)
|
||||
memset_p((void **)ports, NULL, count);
|
||||
}
|
||||
|
||||
static inline void
|
||||
copy_ports(struct i915_request **dst, struct i915_request **src, int count)
|
||||
{
|
||||
/* A memcpy_p() would be very useful here! */
|
||||
while (count--)
|
||||
WRITE_ONCE(*dst++, *src++); /* avoid write tearing */
|
||||
}
|
||||
|
||||
static void execlists_dequeue(struct intel_engine_cs *engine)
|
||||
{
|
||||
struct intel_engine_execlists * const execlists = &engine->execlists;
|
||||
@ -2271,38 +2281,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
|
||||
GEM_BUG_ON(!(rq->execution_mask & engine->mask));
|
||||
WRITE_ONCE(rq->engine, engine);
|
||||
|
||||
if (engine != ve->siblings[0]) {
|
||||
u32 *regs = ve->context.lrc_reg_state;
|
||||
unsigned int n;
|
||||
|
||||
GEM_BUG_ON(READ_ONCE(ve->context.inflight));
|
||||
|
||||
if (!intel_engine_has_relative_mmio(engine))
|
||||
virtual_update_register_offsets(regs,
|
||||
engine);
|
||||
|
||||
if (!list_empty(&ve->context.signals))
|
||||
virtual_xfer_breadcrumbs(ve);
|
||||
|
||||
/*
|
||||
* Move the bound engine to the top of the list
|
||||
* for future execution. We then kick this
|
||||
* tasklet first before checking others, so that
|
||||
* we preferentially reuse this set of bound
|
||||
* registers.
|
||||
*/
|
||||
for (n = 1; n < ve->num_siblings; n++) {
|
||||
if (ve->siblings[n] == engine) {
|
||||
swap(ve->siblings[n],
|
||||
ve->siblings[0]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
GEM_BUG_ON(ve->siblings[0] != engine);
|
||||
}
|
||||
|
||||
if (__i915_request_submit(rq)) {
|
||||
/*
|
||||
* Only after we confirm that we will submit
|
||||
* this request (i.e. it has not already
|
||||
* completed), do we want to update the context.
|
||||
*
|
||||
* This serves two purposes. It avoids
|
||||
* unnecessary work if we are resubmitting an
|
||||
* already completed request after timeslicing.
|
||||
* But more importantly, it prevents us altering
|
||||
* ve->siblings[] on an idle context, where
|
||||
* we may be using ve->siblings[] in
|
||||
* virtual_context_enter / virtual_context_exit.
|
||||
*/
|
||||
virtual_xfer_context(ve, engine);
|
||||
GEM_BUG_ON(ve->siblings[0] != engine);
|
||||
|
||||
submit = true;
|
||||
last = rq;
|
||||
}
|
||||
@ -2648,10 +2643,9 @@ static void process_csb(struct intel_engine_cs *engine)
|
||||
|
||||
/* switch pending to inflight */
|
||||
GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
|
||||
memcpy(execlists->inflight,
|
||||
execlists->pending,
|
||||
execlists_num_ports(execlists) *
|
||||
sizeof(*execlists->pending));
|
||||
copy_ports(execlists->inflight,
|
||||
execlists->pending,
|
||||
execlists_num_ports(execlists));
|
||||
smp_wmb(); /* complete the seqlock */
|
||||
WRITE_ONCE(execlists->active, execlists->inflight);
|
||||
|
||||
@ -3309,7 +3303,10 @@ static void execlists_context_unpin(struct intel_context *ce)
|
||||
{
|
||||
check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
|
||||
ce->engine);
|
||||
}
|
||||
|
||||
static void execlists_context_post_unpin(struct intel_context *ce)
|
||||
{
|
||||
i915_gem_object_unpin_map(ce->state->obj);
|
||||
}
|
||||
|
||||
@ -3471,20 +3468,24 @@ __execlists_update_reg_state(const struct intel_context *ce,
|
||||
}
|
||||
|
||||
static int
|
||||
__execlists_context_pin(struct intel_context *ce,
|
||||
struct intel_engine_cs *engine)
|
||||
execlists_context_pre_pin(struct intel_context *ce,
|
||||
struct i915_gem_ww_ctx *ww, void **vaddr)
|
||||
{
|
||||
void *vaddr;
|
||||
|
||||
GEM_BUG_ON(!ce->state);
|
||||
GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
|
||||
|
||||
vaddr = i915_gem_object_pin_map(ce->state->obj,
|
||||
i915_coherent_map_type(engine->i915) |
|
||||
*vaddr = i915_gem_object_pin_map(ce->state->obj,
|
||||
i915_coherent_map_type(ce->engine->i915) |
|
||||
I915_MAP_OVERRIDE);
|
||||
if (IS_ERR(vaddr))
|
||||
return PTR_ERR(vaddr);
|
||||
|
||||
return PTR_ERR_OR_ZERO(*vaddr);
|
||||
}
|
||||
|
||||
static int
|
||||
__execlists_context_pin(struct intel_context *ce,
|
||||
struct intel_engine_cs *engine,
|
||||
void *vaddr)
|
||||
{
|
||||
ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
|
||||
ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
|
||||
__execlists_update_reg_state(ce, engine, ce->ring->tail);
|
||||
@ -3492,9 +3493,9 @@ __execlists_context_pin(struct intel_context *ce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int execlists_context_pin(struct intel_context *ce)
|
||||
static int execlists_context_pin(struct intel_context *ce, void *vaddr)
|
||||
{
|
||||
return __execlists_context_pin(ce, ce->engine);
|
||||
return __execlists_context_pin(ce, ce->engine, vaddr);
|
||||
}
|
||||
|
||||
static int execlists_context_alloc(struct intel_context *ce)
|
||||
@ -3520,8 +3521,10 @@ static void execlists_context_reset(struct intel_context *ce)
|
||||
static const struct intel_context_ops execlists_context_ops = {
|
||||
.alloc = execlists_context_alloc,
|
||||
|
||||
.pre_pin = execlists_context_pre_pin,
|
||||
.pin = execlists_context_pin,
|
||||
.unpin = execlists_context_unpin,
|
||||
.post_unpin = execlists_context_post_unpin,
|
||||
|
||||
.enter = intel_context_enter_engine,
|
||||
.exit = intel_context_exit_engine,
|
||||
@ -3885,7 +3888,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
|
||||
goto err;
|
||||
}
|
||||
|
||||
err = i915_ggtt_pin(vma, 0, PIN_HIGH);
|
||||
err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
|
||||
if (err)
|
||||
goto err;
|
||||
|
||||
@ -4126,7 +4129,7 @@ static int execlists_resume(struct intel_engine_cs *engine)
|
||||
{
|
||||
intel_mocs_init_engine(engine);
|
||||
|
||||
intel_engine_reset_breadcrumbs(engine);
|
||||
intel_breadcrumbs_reset(engine->breadcrumbs);
|
||||
|
||||
if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
|
||||
struct drm_printer p = drm_debug_printer(__func__);
|
||||
@ -4757,14 +4760,21 @@ static int gen12_emit_flush(struct i915_request *request, u32 mode)
|
||||
intel_engine_mask_t aux_inv = 0;
|
||||
u32 cmd, *cs;
|
||||
|
||||
cmd = 4;
|
||||
if (mode & EMIT_INVALIDATE)
|
||||
cmd += 2;
|
||||
if (mode & EMIT_INVALIDATE)
|
||||
aux_inv = request->engine->mask & ~BIT(BCS0);
|
||||
if (aux_inv)
|
||||
cmd += 2 * hweight8(aux_inv) + 2;
|
||||
|
||||
cs = intel_ring_begin(request,
|
||||
4 + (aux_inv ? 2 * hweight8(aux_inv) + 2 : 0));
|
||||
cs = intel_ring_begin(request, cmd);
|
||||
if (IS_ERR(cs))
|
||||
return PTR_ERR(cs);
|
||||
|
||||
if (mode & EMIT_INVALIDATE)
|
||||
*cs++ = preparser_disable(true);
|
||||
|
||||
cmd = MI_FLUSH_DW + 1;
|
||||
|
||||
/* We always require a command barrier so that subsequent
|
||||
@ -4797,6 +4807,10 @@ static int gen12_emit_flush(struct i915_request *request, u32 mode)
|
||||
}
|
||||
*cs++ = MI_NOOP;
|
||||
}
|
||||
|
||||
if (mode & EMIT_INVALIDATE)
|
||||
*cs++ = preparser_disable(false);
|
||||
|
||||
intel_ring_advance(request, cs);
|
||||
|
||||
return 0;
|
||||
@ -5295,6 +5309,14 @@ populate_lr_context(struct intel_context *ce,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct intel_timeline *pinned_timeline(struct intel_context *ce)
|
||||
{
|
||||
struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
|
||||
|
||||
return intel_timeline_create_from_engine(ce->engine,
|
||||
page_unmask_bits(tl));
|
||||
}
|
||||
|
||||
static int __execlists_context_alloc(struct intel_context *ce,
|
||||
struct intel_engine_cs *engine)
|
||||
{
|
||||
@ -5325,19 +5347,17 @@ static int __execlists_context_alloc(struct intel_context *ce,
|
||||
goto error_deref_obj;
|
||||
}
|
||||
|
||||
if (!ce->timeline) {
|
||||
if (!page_mask_bits(ce->timeline)) {
|
||||
struct intel_timeline *tl;
|
||||
struct i915_vma *hwsp;
|
||||
|
||||
/*
|
||||
* Use the static global HWSP for the kernel context, and
|
||||
* a dynamically allocated cacheline for everyone else.
|
||||
*/
|
||||
hwsp = NULL;
|
||||
if (unlikely(intel_context_is_barrier(ce)))
|
||||
hwsp = engine->status_page.vma;
|
||||
|
||||
tl = intel_timeline_create(engine->gt, hwsp);
|
||||
if (unlikely(ce->timeline))
|
||||
tl = pinned_timeline(ce);
|
||||
else
|
||||
tl = intel_timeline_create(engine->gt);
|
||||
if (IS_ERR(tl)) {
|
||||
ret = PTR_ERR(tl);
|
||||
goto error_deref_obj;
|
||||
@ -5443,12 +5463,12 @@ static int virtual_context_alloc(struct intel_context *ce)
|
||||
return __execlists_context_alloc(ce, ve->siblings[0]);
|
||||
}
|
||||
|
||||
static int virtual_context_pin(struct intel_context *ce)
|
||||
static int virtual_context_pin(struct intel_context *ce, void *vaddr)
|
||||
{
|
||||
struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
|
||||
|
||||
/* Note: we must use a real engine class for setting up reg state */
|
||||
return __execlists_context_pin(ce, ve->siblings[0]);
|
||||
return __execlists_context_pin(ce, ve->siblings[0], vaddr);
|
||||
}
|
||||
|
||||
static void virtual_context_enter(struct intel_context *ce)
|
||||
@ -5476,8 +5496,10 @@ static void virtual_context_exit(struct intel_context *ce)
|
||||
static const struct intel_context_ops virtual_context_ops = {
|
||||
.alloc = virtual_context_alloc,
|
||||
|
||||
.pre_pin = execlists_context_pre_pin,
|
||||
.pin = virtual_context_pin,
|
||||
.unpin = execlists_context_unpin,
|
||||
.post_unpin = execlists_context_post_unpin,
|
||||
|
||||
.enter = virtual_context_enter,
|
||||
.exit = virtual_context_exit,
|
||||
@ -5711,9 +5733,7 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
|
||||
snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
|
||||
|
||||
intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
|
||||
intel_engine_init_breadcrumbs(&ve->base);
|
||||
intel_engine_init_execlists(&ve->base);
|
||||
ve->base.breadcrumbs.irq_armed = true; /* fake HW, used for irq_work */
|
||||
|
||||
ve->base.cops = &virtual_context_ops;
|
||||
ve->base.request_alloc = execlists_request_alloc;
|
||||
@ -5730,6 +5750,12 @@ intel_execlists_create_virtual(struct intel_engine_cs **siblings,
|
||||
|
||||
intel_context_init(&ve->context, &ve->base);
|
||||
|
||||
ve->base.breadcrumbs = intel_breadcrumbs_create(NULL);
|
||||
if (!ve->base.breadcrumbs) {
|
||||
err = -ENOMEM;
|
||||
goto err_put;
|
||||
}
|
||||
|
||||
for (n = 0; n < count; n++) {
|
||||
struct intel_engine_cs *sibling = siblings[n];
|
||||
|
||||
|
@ -18,7 +18,8 @@ struct i915_page_table *alloc_pt(struct i915_address_space *vm)
|
||||
if (unlikely(!pt))
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
if (unlikely(setup_page_dma(vm, &pt->base))) {
|
||||
pt->base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
|
||||
if (IS_ERR(pt->base)) {
|
||||
kfree(pt);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
@ -27,14 +28,20 @@ struct i915_page_table *alloc_pt(struct i915_address_space *vm)
|
||||
return pt;
|
||||
}
|
||||
|
||||
struct i915_page_directory *__alloc_pd(size_t sz)
|
||||
struct i915_page_directory *__alloc_pd(int count)
|
||||
{
|
||||
struct i915_page_directory *pd;
|
||||
|
||||
pd = kzalloc(sz, I915_GFP_ALLOW_FAIL);
|
||||
pd = kzalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
|
||||
if (unlikely(!pd))
|
||||
return NULL;
|
||||
|
||||
pd->entry = kcalloc(count, sizeof(*pd->entry), I915_GFP_ALLOW_FAIL);
|
||||
if (unlikely(!pd->entry)) {
|
||||
kfree(pd);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
spin_lock_init(&pd->lock);
|
||||
return pd;
|
||||
}
|
||||
@ -43,11 +50,13 @@ struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
|
||||
{
|
||||
struct i915_page_directory *pd;
|
||||
|
||||
pd = __alloc_pd(sizeof(*pd));
|
||||
pd = __alloc_pd(I915_PDES);
|
||||
if (unlikely(!pd))
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
if (unlikely(setup_page_dma(vm, px_base(pd)))) {
|
||||
pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
|
||||
if (IS_ERR(pd->pt.base)) {
|
||||
kfree(pd->entry);
|
||||
kfree(pd);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
@ -55,41 +64,52 @@ struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
|
||||
return pd;
|
||||
}
|
||||
|
||||
void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd)
|
||||
void free_px(struct i915_address_space *vm, struct i915_page_table *pt, int lvl)
|
||||
{
|
||||
cleanup_page_dma(vm, pd);
|
||||
kfree(pd);
|
||||
BUILD_BUG_ON(offsetof(struct i915_page_directory, pt));
|
||||
|
||||
if (lvl) {
|
||||
struct i915_page_directory *pd =
|
||||
container_of(pt, typeof(*pd), pt);
|
||||
kfree(pd->entry);
|
||||
}
|
||||
|
||||
if (pt->base)
|
||||
i915_gem_object_put(pt->base);
|
||||
|
||||
kfree(pt);
|
||||
}
|
||||
|
||||
static inline void
|
||||
write_dma_entry(struct i915_page_dma * const pdma,
|
||||
write_dma_entry(struct drm_i915_gem_object * const pdma,
|
||||
const unsigned short idx,
|
||||
const u64 encoded_entry)
|
||||
{
|
||||
u64 * const vaddr = kmap_atomic(pdma->page);
|
||||
u64 * const vaddr = kmap_atomic(__px_page(pdma));
|
||||
|
||||
vaddr[idx] = encoded_entry;
|
||||
clflush_cache_range(&vaddr[idx], sizeof(u64));
|
||||
kunmap_atomic(vaddr);
|
||||
}
|
||||
|
||||
void
|
||||
__set_pd_entry(struct i915_page_directory * const pd,
|
||||
const unsigned short idx,
|
||||
struct i915_page_dma * const to,
|
||||
struct i915_page_table * const to,
|
||||
u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
|
||||
{
|
||||
/* Each thread pre-pins the pd, and we may have a thread per pde. */
|
||||
GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * ARRAY_SIZE(pd->entry));
|
||||
GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * I915_PDES);
|
||||
|
||||
atomic_inc(px_used(pd));
|
||||
pd->entry[idx] = to;
|
||||
write_dma_entry(px_base(pd), idx, encode(to->daddr, I915_CACHE_LLC));
|
||||
write_dma_entry(px_base(pd), idx, encode(px_dma(to), I915_CACHE_LLC));
|
||||
}
|
||||
|
||||
void
|
||||
clear_pd_entry(struct i915_page_directory * const pd,
|
||||
const unsigned short idx,
|
||||
const struct i915_page_scratch * const scratch)
|
||||
const struct drm_i915_gem_object * const scratch)
|
||||
{
|
||||
GEM_BUG_ON(atomic_read(px_used(pd)) == 0);
|
||||
|
||||
@ -102,7 +122,7 @@ bool
|
||||
release_pd_entry(struct i915_page_directory * const pd,
|
||||
const unsigned short idx,
|
||||
struct i915_page_table * const pt,
|
||||
const struct i915_page_scratch * const scratch)
|
||||
const struct drm_i915_gem_object * const scratch)
|
||||
{
|
||||
bool free = false;
|
||||
|
||||
@ -155,19 +175,16 @@ struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt)
|
||||
return ppgtt;
|
||||
}
|
||||
|
||||
int ppgtt_bind_vma(struct i915_address_space *vm,
|
||||
struct i915_vma *vma,
|
||||
enum i915_cache_level cache_level,
|
||||
u32 flags)
|
||||
void ppgtt_bind_vma(struct i915_address_space *vm,
|
||||
struct i915_vm_pt_stash *stash,
|
||||
struct i915_vma *vma,
|
||||
enum i915_cache_level cache_level,
|
||||
u32 flags)
|
||||
{
|
||||
u32 pte_flags;
|
||||
int err;
|
||||
|
||||
if (!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) {
|
||||
err = vm->allocate_va_range(vm, vma->node.start, vma->size);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
vm->allocate_va_range(vm, stash, vma->node.start, vma->size);
|
||||
set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
|
||||
}
|
||||
|
||||
@ -178,8 +195,6 @@ int ppgtt_bind_vma(struct i915_address_space *vm,
|
||||
|
||||
vm->insert_entries(vm, vma, cache_level, pte_flags);
|
||||
wmb();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ppgtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
|
||||
@ -188,12 +203,93 @@ void ppgtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
|
||||
vm->clear_range(vm, vma->node.start, vma->size);
|
||||
}
|
||||
|
||||
static unsigned long pd_count(u64 size, int shift)
|
||||
{
|
||||
/* Beware later misalignment */
|
||||
return (size + 2 * (BIT_ULL(shift) - 1)) >> shift;
|
||||
}
|
||||
|
||||
int i915_vm_alloc_pt_stash(struct i915_address_space *vm,
|
||||
struct i915_vm_pt_stash *stash,
|
||||
u64 size)
|
||||
{
|
||||
unsigned long count;
|
||||
int shift, n;
|
||||
|
||||
shift = vm->pd_shift;
|
||||
if (!shift)
|
||||
return 0;
|
||||
|
||||
count = pd_count(size, shift);
|
||||
while (count--) {
|
||||
struct i915_page_table *pt;
|
||||
|
||||
pt = alloc_pt(vm);
|
||||
if (IS_ERR(pt)) {
|
||||
i915_vm_free_pt_stash(vm, stash);
|
||||
return PTR_ERR(pt);
|
||||
}
|
||||
|
||||
pt->stash = stash->pt[0];
|
||||
stash->pt[0] = pt;
|
||||
}
|
||||
|
||||
for (n = 1; n < vm->top; n++) {
|
||||
shift += ilog2(I915_PDES); /* Each PD holds 512 entries */
|
||||
count = pd_count(size, shift);
|
||||
while (count--) {
|
||||
struct i915_page_directory *pd;
|
||||
|
||||
pd = alloc_pd(vm);
|
||||
if (IS_ERR(pd)) {
|
||||
i915_vm_free_pt_stash(vm, stash);
|
||||
return PTR_ERR(pd);
|
||||
}
|
||||
|
||||
pd->pt.stash = stash->pt[1];
|
||||
stash->pt[1] = &pd->pt;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int i915_vm_pin_pt_stash(struct i915_address_space *vm,
|
||||
struct i915_vm_pt_stash *stash)
|
||||
{
|
||||
struct i915_page_table *pt;
|
||||
int n, err;
|
||||
|
||||
for (n = 0; n < ARRAY_SIZE(stash->pt); n++) {
|
||||
for (pt = stash->pt[n]; pt; pt = pt->stash) {
|
||||
err = pin_pt_dma(vm, pt->base);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void i915_vm_free_pt_stash(struct i915_address_space *vm,
|
||||
struct i915_vm_pt_stash *stash)
|
||||
{
|
||||
struct i915_page_table *pt;
|
||||
int n;
|
||||
|
||||
for (n = 0; n < ARRAY_SIZE(stash->pt); n++) {
|
||||
while ((pt = stash->pt[n])) {
|
||||
stash->pt[n] = pt->stash;
|
||||
free_px(vm, pt, n);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int ppgtt_set_pages(struct i915_vma *vma)
|
||||
{
|
||||
GEM_BUG_ON(vma->pages);
|
||||
|
||||
vma->pages = vma->obj->mm.pages;
|
||||
|
||||
vma->page_sizes = vma->obj->mm.page_sizes;
|
||||
|
||||
return 0;
|
||||
|
@ -27,6 +27,7 @@
|
||||
|
||||
#include "i915_drv.h"
|
||||
#include "intel_renderstate.h"
|
||||
#include "gt/intel_context.h"
|
||||
#include "intel_ring.h"
|
||||
|
||||
static const struct intel_renderstate_rodata *
|
||||
@ -157,33 +158,47 @@ out:
|
||||
#undef OUT_BATCH
|
||||
|
||||
int intel_renderstate_init(struct intel_renderstate *so,
|
||||
struct intel_engine_cs *engine)
|
||||
struct intel_context *ce)
|
||||
{
|
||||
struct drm_i915_gem_object *obj;
|
||||
struct intel_engine_cs *engine = ce->engine;
|
||||
struct drm_i915_gem_object *obj = NULL;
|
||||
int err;
|
||||
|
||||
memset(so, 0, sizeof(*so));
|
||||
|
||||
so->rodata = render_state_get_rodata(engine);
|
||||
if (!so->rodata)
|
||||
if (so->rodata) {
|
||||
if (so->rodata->batch_items * 4 > PAGE_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
|
||||
if (IS_ERR(obj))
|
||||
return PTR_ERR(obj);
|
||||
|
||||
so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
|
||||
if (IS_ERR(so->vma)) {
|
||||
err = PTR_ERR(so->vma);
|
||||
goto err_obj;
|
||||
}
|
||||
}
|
||||
|
||||
i915_gem_ww_ctx_init(&so->ww, true);
|
||||
retry:
|
||||
err = intel_context_pin_ww(ce, &so->ww);
|
||||
if (err)
|
||||
goto err_fini;
|
||||
|
||||
/* return early if there's nothing to setup */
|
||||
if (!err && !so->rodata)
|
||||
return 0;
|
||||
|
||||
if (so->rodata->batch_items * 4 > PAGE_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
|
||||
if (IS_ERR(obj))
|
||||
return PTR_ERR(obj);
|
||||
|
||||
so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
|
||||
if (IS_ERR(so->vma)) {
|
||||
err = PTR_ERR(so->vma);
|
||||
goto err_obj;
|
||||
}
|
||||
err = i915_gem_object_lock(so->vma->obj, &so->ww);
|
||||
if (err)
|
||||
goto err_context;
|
||||
|
||||
err = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
|
||||
if (err)
|
||||
goto err_obj;
|
||||
goto err_context;
|
||||
|
||||
err = render_state_setup(so, engine->i915);
|
||||
if (err)
|
||||
@ -193,8 +208,18 @@ int intel_renderstate_init(struct intel_renderstate *so,
|
||||
|
||||
err_unpin:
|
||||
i915_vma_unpin(so->vma);
|
||||
err_context:
|
||||
intel_context_unpin(ce);
|
||||
err_fini:
|
||||
if (err == -EDEADLK) {
|
||||
err = i915_gem_ww_ctx_backoff(&so->ww);
|
||||
if (!err)
|
||||
goto retry;
|
||||
}
|
||||
i915_gem_ww_ctx_fini(&so->ww);
|
||||
err_obj:
|
||||
i915_gem_object_put(obj);
|
||||
if (obj)
|
||||
i915_gem_object_put(obj);
|
||||
so->vma = NULL;
|
||||
return err;
|
||||
}
|
||||
@ -208,11 +233,9 @@ int intel_renderstate_emit(struct intel_renderstate *so,
|
||||
if (!so->vma)
|
||||
return 0;
|
||||
|
||||
i915_vma_lock(so->vma);
|
||||
err = i915_request_await_object(rq, so->vma->obj, false);
|
||||
if (err == 0)
|
||||
err = i915_vma_move_to_active(so->vma, rq, 0);
|
||||
i915_vma_unlock(so->vma);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@ -233,7 +256,17 @@ int intel_renderstate_emit(struct intel_renderstate *so,
|
||||
return 0;
|
||||
}
|
||||
|
||||
void intel_renderstate_fini(struct intel_renderstate *so)
|
||||
void intel_renderstate_fini(struct intel_renderstate *so,
|
||||
struct intel_context *ce)
|
||||
{
|
||||
i915_vma_unpin_and_release(&so->vma, 0);
|
||||
if (so->vma) {
|
||||
i915_vma_unpin(so->vma);
|
||||
i915_vma_close(so->vma);
|
||||
}
|
||||
|
||||
intel_context_unpin(ce);
|
||||
i915_gem_ww_ctx_fini(&so->ww);
|
||||
|
||||
if (so->vma)
|
||||
i915_gem_object_put(so->vma->obj);
|
||||
}
|
||||
|
@ -25,9 +25,10 @@
|
||||
#define _INTEL_RENDERSTATE_H_
|
||||
|
||||
#include <linux/types.h>
|
||||
#include "i915_gem.h"
|
||||
|
||||
struct i915_request;
|
||||
struct intel_engine_cs;
|
||||
struct intel_context;
|
||||
struct i915_vma;
|
||||
|
||||
struct intel_renderstate_rodata {
|
||||
@ -49,6 +50,7 @@ extern const struct intel_renderstate_rodata gen8_null_state;
|
||||
extern const struct intel_renderstate_rodata gen9_null_state;
|
||||
|
||||
struct intel_renderstate {
|
||||
struct i915_gem_ww_ctx ww;
|
||||
const struct intel_renderstate_rodata *rodata;
|
||||
struct i915_vma *vma;
|
||||
u32 batch_offset;
|
||||
@ -58,9 +60,10 @@ struct intel_renderstate {
|
||||
};
|
||||
|
||||
int intel_renderstate_init(struct intel_renderstate *so,
|
||||
struct intel_engine_cs *engine);
|
||||
struct intel_context *ce);
|
||||
int intel_renderstate_emit(struct intel_renderstate *so,
|
||||
struct i915_request *rq);
|
||||
void intel_renderstate_fini(struct intel_renderstate *so);
|
||||
void intel_renderstate_fini(struct intel_renderstate *so,
|
||||
struct intel_context *ce);
|
||||
|
||||
#endif /* _INTEL_RENDERSTATE_H_ */
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "i915_drv.h"
|
||||
#include "i915_gpu_error.h"
|
||||
#include "i915_irq.h"
|
||||
#include "intel_breadcrumbs.h"
|
||||
#include "intel_engine_pm.h"
|
||||
#include "intel_gt.h"
|
||||
#include "intel_gt_pm.h"
|
||||
|
@ -21,7 +21,13 @@ unsigned int intel_ring_update_space(struct intel_ring *ring)
|
||||
return space;
|
||||
}
|
||||
|
||||
int intel_ring_pin(struct intel_ring *ring)
|
||||
void __intel_ring_pin(struct intel_ring *ring)
|
||||
{
|
||||
GEM_BUG_ON(!atomic_read(&ring->pin_count));
|
||||
atomic_inc(&ring->pin_count);
|
||||
}
|
||||
|
||||
int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww)
|
||||
{
|
||||
struct i915_vma *vma = ring->vma;
|
||||
unsigned int flags;
|
||||
@ -39,7 +45,7 @@ int intel_ring_pin(struct intel_ring *ring)
|
||||
else
|
||||
flags |= PIN_HIGH;
|
||||
|
||||
ret = i915_ggtt_pin(vma, 0, flags);
|
||||
ret = i915_ggtt_pin(vma, ww, 0, flags);
|
||||
if (unlikely(ret))
|
||||
goto err_unpin;
|
||||
|
||||
|
@ -21,7 +21,8 @@ int intel_ring_cacheline_align(struct i915_request *rq);
|
||||
|
||||
unsigned int intel_ring_update_space(struct intel_ring *ring);
|
||||
|
||||
int intel_ring_pin(struct intel_ring *ring);
|
||||
void __intel_ring_pin(struct intel_ring *ring);
|
||||
int intel_ring_pin(struct intel_ring *ring, struct i915_gem_ww_ctx *ww);
|
||||
void intel_ring_unpin(struct intel_ring *ring);
|
||||
void intel_ring_reset(struct intel_ring *ring, u32 tail);
|
||||
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include "gen6_ppgtt.h"
|
||||
#include "gen7_renderclear.h"
|
||||
#include "i915_drv.h"
|
||||
#include "intel_breadcrumbs.h"
|
||||
#include "intel_context.h"
|
||||
#include "intel_gt.h"
|
||||
#include "intel_reset.h"
|
||||
@ -201,16 +202,18 @@ static struct i915_address_space *vm_alias(struct i915_address_space *vm)
|
||||
return vm;
|
||||
}
|
||||
|
||||
static u32 pp_dir(struct i915_address_space *vm)
|
||||
{
|
||||
return to_gen6_ppgtt(i915_vm_to_ppgtt(vm))->pp_dir;
|
||||
}
|
||||
|
||||
static void set_pp_dir(struct intel_engine_cs *engine)
|
||||
{
|
||||
struct i915_address_space *vm = vm_alias(engine->gt->vm);
|
||||
|
||||
if (vm) {
|
||||
struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
|
||||
|
||||
ENGINE_WRITE(engine, RING_PP_DIR_DCLV, PP_DIR_DCLV_2G);
|
||||
ENGINE_WRITE(engine, RING_PP_DIR_BASE,
|
||||
px_base(ppgtt->pd)->ggtt_offset << 10);
|
||||
ENGINE_WRITE(engine, RING_PP_DIR_BASE, pp_dir(vm));
|
||||
}
|
||||
}
|
||||
|
||||
@ -255,7 +258,7 @@ static int xcs_resume(struct intel_engine_cs *engine)
|
||||
else
|
||||
ring_setup_status_page(engine);
|
||||
|
||||
intel_engine_reset_breadcrumbs(engine);
|
||||
intel_breadcrumbs_reset(engine->breadcrumbs);
|
||||
|
||||
/* Enforce ordering by reading HEAD register back */
|
||||
ENGINE_POSTING_READ(engine, RING_HEAD);
|
||||
@ -474,14 +477,16 @@ static void ring_context_destroy(struct kref *ref)
|
||||
intel_context_free(ce);
|
||||
}
|
||||
|
||||
static int __context_pin_ppgtt(struct intel_context *ce)
|
||||
static int ring_context_pre_pin(struct intel_context *ce,
|
||||
struct i915_gem_ww_ctx *ww,
|
||||
void **unused)
|
||||
{
|
||||
struct i915_address_space *vm;
|
||||
int err = 0;
|
||||
|
||||
vm = vm_alias(ce->vm);
|
||||
if (vm)
|
||||
err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)));
|
||||
err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)), ww);
|
||||
|
||||
return err;
|
||||
}
|
||||
@ -496,6 +501,10 @@ static void __context_unpin_ppgtt(struct intel_context *ce)
|
||||
}
|
||||
|
||||
static void ring_context_unpin(struct intel_context *ce)
|
||||
{
|
||||
}
|
||||
|
||||
static void ring_context_post_unpin(struct intel_context *ce)
|
||||
{
|
||||
__context_unpin_ppgtt(ce);
|
||||
}
|
||||
@ -584,9 +593,9 @@ static int ring_context_alloc(struct intel_context *ce)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ring_context_pin(struct intel_context *ce)
|
||||
static int ring_context_pin(struct intel_context *ce, void *unused)
|
||||
{
|
||||
return __context_pin_ppgtt(ce);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ring_context_reset(struct intel_context *ce)
|
||||
@ -597,8 +606,10 @@ static void ring_context_reset(struct intel_context *ce)
|
||||
static const struct intel_context_ops ring_context_ops = {
|
||||
.alloc = ring_context_alloc,
|
||||
|
||||
.pre_pin = ring_context_pre_pin,
|
||||
.pin = ring_context_pin,
|
||||
.unpin = ring_context_unpin,
|
||||
.post_unpin = ring_context_post_unpin,
|
||||
|
||||
.enter = intel_context_enter_engine,
|
||||
.exit = intel_context_exit_engine,
|
||||
@ -608,7 +619,7 @@ static const struct intel_context_ops ring_context_ops = {
|
||||
};
|
||||
|
||||
static int load_pd_dir(struct i915_request *rq,
|
||||
const struct i915_ppgtt *ppgtt,
|
||||
struct i915_address_space *vm,
|
||||
u32 valid)
|
||||
{
|
||||
const struct intel_engine_cs * const engine = rq->engine;
|
||||
@ -624,7 +635,7 @@ static int load_pd_dir(struct i915_request *rq,
|
||||
|
||||
*cs++ = MI_LOAD_REGISTER_IMM(1);
|
||||
*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
|
||||
*cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
|
||||
*cs++ = pp_dir(vm);
|
||||
|
||||
/* Stall until the page table load is complete? */
|
||||
*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
|
||||
@ -826,7 +837,7 @@ static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
|
||||
* post-sync op, this extra pass appears vital before a
|
||||
* mm switch!
|
||||
*/
|
||||
ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
|
||||
ret = load_pd_dir(rq, vm, PP_DIR_DCLV_2G);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -1250,14 +1261,15 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
timeline = intel_timeline_create(engine->gt, engine->status_page.vma);
|
||||
timeline = intel_timeline_create_from_engine(engine,
|
||||
I915_GEM_HWS_SEQNO_ADDR);
|
||||
if (IS_ERR(timeline)) {
|
||||
err = PTR_ERR(timeline);
|
||||
goto err;
|
||||
}
|
||||
GEM_BUG_ON(timeline->has_initial_breadcrumb);
|
||||
|
||||
err = intel_timeline_pin(timeline);
|
||||
err = intel_timeline_pin(timeline, NULL);
|
||||
if (err)
|
||||
goto err_timeline;
|
||||
|
||||
@ -1267,7 +1279,7 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine)
|
||||
goto err_timeline_unpin;
|
||||
}
|
||||
|
||||
err = intel_ring_pin(ring);
|
||||
err = intel_ring_pin(ring, NULL);
|
||||
if (err)
|
||||
goto err_ring;
|
||||
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <drm/i915_drm.h>
|
||||
|
||||
#include "i915_drv.h"
|
||||
#include "intel_breadcrumbs.h"
|
||||
#include "intel_gt.h"
|
||||
#include "intel_gt_clock_utils.h"
|
||||
#include "intel_gt_irq.h"
|
||||
|
@ -215,7 +215,8 @@ static void cacheline_free(struct intel_timeline_cacheline *cl)
|
||||
|
||||
static int intel_timeline_init(struct intel_timeline *timeline,
|
||||
struct intel_gt *gt,
|
||||
struct i915_vma *hwsp)
|
||||
struct i915_vma *hwsp,
|
||||
unsigned int offset)
|
||||
{
|
||||
void *vaddr;
|
||||
|
||||
@ -246,8 +247,7 @@ static int intel_timeline_init(struct intel_timeline *timeline,
|
||||
|
||||
vaddr = page_mask_bits(cl->vaddr);
|
||||
} else {
|
||||
timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
|
||||
|
||||
timeline->hwsp_offset = offset;
|
||||
vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
|
||||
if (IS_ERR(vaddr))
|
||||
return PTR_ERR(vaddr);
|
||||
@ -297,7 +297,9 @@ static void intel_timeline_fini(struct intel_timeline *timeline)
|
||||
}
|
||||
|
||||
struct intel_timeline *
|
||||
intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp)
|
||||
__intel_timeline_create(struct intel_gt *gt,
|
||||
struct i915_vma *global_hwsp,
|
||||
unsigned int offset)
|
||||
{
|
||||
struct intel_timeline *timeline;
|
||||
int err;
|
||||
@ -306,7 +308,7 @@ intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp)
|
||||
if (!timeline)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
err = intel_timeline_init(timeline, gt, global_hwsp);
|
||||
err = intel_timeline_init(timeline, gt, global_hwsp, offset);
|
||||
if (err) {
|
||||
kfree(timeline);
|
||||
return ERR_PTR(err);
|
||||
@ -315,14 +317,20 @@ intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp)
|
||||
return timeline;
|
||||
}
|
||||
|
||||
int intel_timeline_pin(struct intel_timeline *tl)
|
||||
void __intel_timeline_pin(struct intel_timeline *tl)
|
||||
{
|
||||
GEM_BUG_ON(!atomic_read(&tl->pin_count));
|
||||
atomic_inc(&tl->pin_count);
|
||||
}
|
||||
|
||||
int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (atomic_add_unless(&tl->pin_count, 1, 0))
|
||||
return 0;
|
||||
|
||||
err = i915_ggtt_pin(tl->hwsp_ggtt, 0, PIN_HIGH);
|
||||
err = i915_ggtt_pin(tl->hwsp_ggtt, ww, 0, PIN_HIGH);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@ -465,7 +473,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
|
||||
goto err_rollback;
|
||||
}
|
||||
|
||||
err = i915_ggtt_pin(vma, 0, PIN_HIGH);
|
||||
err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH);
|
||||
if (err) {
|
||||
__idle_hwsp_free(vma->private, cacheline);
|
||||
goto err_rollback;
|
||||
@ -484,7 +492,9 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
|
||||
* free it after the current request is retired, which ensures that
|
||||
* all writes into the cacheline from previous requests are complete.
|
||||
*/
|
||||
err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence);
|
||||
err = i915_active_ref(&tl->hwsp_cacheline->active,
|
||||
tl->fence_context,
|
||||
&rq->fence);
|
||||
if (err)
|
||||
goto err_cacheline;
|
||||
|
||||
|
@ -29,10 +29,27 @@
|
||||
|
||||
#include "i915_active.h"
|
||||
#include "i915_syncmap.h"
|
||||
#include "gt/intel_timeline_types.h"
|
||||
#include "intel_timeline_types.h"
|
||||
|
||||
struct intel_timeline *
|
||||
intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp);
|
||||
__intel_timeline_create(struct intel_gt *gt,
|
||||
struct i915_vma *global_hwsp,
|
||||
unsigned int offset);
|
||||
|
||||
static inline struct intel_timeline *
|
||||
intel_timeline_create(struct intel_gt *gt)
|
||||
{
|
||||
return __intel_timeline_create(gt, NULL, 0);
|
||||
}
|
||||
|
||||
static inline struct intel_timeline *
|
||||
intel_timeline_create_from_engine(struct intel_engine_cs *engine,
|
||||
unsigned int offset)
|
||||
{
|
||||
return __intel_timeline_create(engine->gt,
|
||||
engine->status_page.vma,
|
||||
offset);
|
||||
}
|
||||
|
||||
static inline struct intel_timeline *
|
||||
intel_timeline_get(struct intel_timeline *timeline)
|
||||
@ -71,7 +88,8 @@ static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl,
|
||||
return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno);
|
||||
}
|
||||
|
||||
int intel_timeline_pin(struct intel_timeline *tl);
|
||||
void __intel_timeline_pin(struct intel_timeline *tl);
|
||||
int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww);
|
||||
void intel_timeline_enter(struct intel_timeline *tl);
|
||||
int intel_timeline_get_seqno(struct intel_timeline *tl,
|
||||
struct i915_request *rq,
|
||||
|
@ -2088,6 +2088,7 @@ static int engine_wa_list_verify(struct intel_context *ce,
|
||||
const struct i915_wa *wa;
|
||||
struct i915_request *rq;
|
||||
struct i915_vma *vma;
|
||||
struct i915_gem_ww_ctx ww;
|
||||
unsigned int i;
|
||||
u32 *results;
|
||||
int err;
|
||||
@ -2100,29 +2101,34 @@ static int engine_wa_list_verify(struct intel_context *ce,
|
||||
return PTR_ERR(vma);
|
||||
|
||||
intel_engine_pm_get(ce->engine);
|
||||
rq = intel_context_create_request(ce);
|
||||
intel_engine_pm_put(ce->engine);
|
||||
i915_gem_ww_ctx_init(&ww, false);
|
||||
retry:
|
||||
err = i915_gem_object_lock(vma->obj, &ww);
|
||||
if (err == 0)
|
||||
err = intel_context_pin_ww(ce, &ww);
|
||||
if (err)
|
||||
goto err_pm;
|
||||
|
||||
rq = i915_request_create(ce);
|
||||
if (IS_ERR(rq)) {
|
||||
err = PTR_ERR(rq);
|
||||
goto err_vma;
|
||||
goto err_unpin;
|
||||
}
|
||||
|
||||
i915_vma_lock(vma);
|
||||
err = i915_request_await_object(rq, vma->obj, true);
|
||||
if (err == 0)
|
||||
err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
|
||||
i915_vma_unlock(vma);
|
||||
if (err) {
|
||||
i915_request_add(rq);
|
||||
goto err_vma;
|
||||
}
|
||||
|
||||
err = wa_list_srm(rq, wal, vma);
|
||||
if (err)
|
||||
goto err_vma;
|
||||
if (err == 0)
|
||||
err = wa_list_srm(rq, wal, vma);
|
||||
|
||||
i915_request_get(rq);
|
||||
if (err)
|
||||
i915_request_set_error_once(rq, err);
|
||||
i915_request_add(rq);
|
||||
|
||||
if (err)
|
||||
goto err_rq;
|
||||
|
||||
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
|
||||
err = -ETIME;
|
||||
goto err_rq;
|
||||
@ -2147,7 +2153,16 @@ static int engine_wa_list_verify(struct intel_context *ce,
|
||||
|
||||
err_rq:
|
||||
i915_request_put(rq);
|
||||
err_vma:
|
||||
err_unpin:
|
||||
intel_context_unpin(ce);
|
||||
err_pm:
|
||||
if (err == -EDEADLK) {
|
||||
err = i915_gem_ww_ctx_backoff(&ww);
|
||||
if (!err)
|
||||
goto retry;
|
||||
}
|
||||
i915_gem_ww_ctx_fini(&ww);
|
||||
intel_engine_pm_put(ce->engine);
|
||||
i915_vma_unpin(vma);
|
||||
i915_vma_put(vma);
|
||||
return err;
|
||||
|
@ -131,6 +131,10 @@ static void mock_context_unpin(struct intel_context *ce)
|
||||
{
|
||||
}
|
||||
|
||||
static void mock_context_post_unpin(struct intel_context *ce)
|
||||
{
|
||||
}
|
||||
|
||||
static void mock_context_destroy(struct kref *ref)
|
||||
{
|
||||
struct intel_context *ce = container_of(ref, typeof(*ce), ref);
|
||||
@ -152,8 +156,7 @@ static int mock_context_alloc(struct intel_context *ce)
|
||||
if (!ce->ring)
|
||||
return -ENOMEM;
|
||||
|
||||
GEM_BUG_ON(ce->timeline);
|
||||
ce->timeline = intel_timeline_create(ce->engine->gt, NULL);
|
||||
ce->timeline = intel_timeline_create(ce->engine->gt);
|
||||
if (IS_ERR(ce->timeline)) {
|
||||
kfree(ce->engine);
|
||||
return PTR_ERR(ce->timeline);
|
||||
@ -164,7 +167,13 @@ static int mock_context_alloc(struct intel_context *ce)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mock_context_pin(struct intel_context *ce)
|
||||
static int mock_context_pre_pin(struct intel_context *ce,
|
||||
struct i915_gem_ww_ctx *ww, void **unused)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mock_context_pin(struct intel_context *ce, void *unused)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@ -176,8 +185,10 @@ static void mock_context_reset(struct intel_context *ce)
|
||||
static const struct intel_context_ops mock_context_ops = {
|
||||
.alloc = mock_context_alloc,
|
||||
|
||||
.pre_pin = mock_context_pre_pin,
|
||||
.pin = mock_context_pin,
|
||||
.unpin = mock_context_unpin,
|
||||
.post_unpin = mock_context_post_unpin,
|
||||
|
||||
.enter = intel_context_enter_engine,
|
||||
.exit = intel_context_exit_engine,
|
||||
@ -261,11 +272,12 @@ static void mock_engine_release(struct intel_engine_cs *engine)
|
||||
|
||||
GEM_BUG_ON(timer_pending(&mock->hw_delay));
|
||||
|
||||
intel_breadcrumbs_free(engine->breadcrumbs);
|
||||
|
||||
intel_context_unpin(engine->kernel_context);
|
||||
intel_context_put(engine->kernel_context);
|
||||
|
||||
intel_engine_fini_retire(engine);
|
||||
intel_engine_fini_breadcrumbs(engine);
|
||||
}
|
||||
|
||||
struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
|
||||
@ -323,20 +335,26 @@ int mock_engine_init(struct intel_engine_cs *engine)
|
||||
struct intel_context *ce;
|
||||
|
||||
intel_engine_init_active(engine, ENGINE_MOCK);
|
||||
intel_engine_init_breadcrumbs(engine);
|
||||
intel_engine_init_execlists(engine);
|
||||
intel_engine_init__pm(engine);
|
||||
intel_engine_init_retire(engine);
|
||||
|
||||
engine->breadcrumbs = intel_breadcrumbs_create(NULL);
|
||||
if (!engine->breadcrumbs)
|
||||
return -ENOMEM;
|
||||
|
||||
ce = create_kernel_context(engine);
|
||||
if (IS_ERR(ce))
|
||||
goto err_breadcrumbs;
|
||||
|
||||
/* We insist the kernel context is using the status_page */
|
||||
engine->status_page.vma = ce->timeline->hwsp_ggtt;
|
||||
|
||||
engine->kernel_context = ce;
|
||||
return 0;
|
||||
|
||||
err_breadcrumbs:
|
||||
intel_engine_fini_breadcrumbs(engine);
|
||||
intel_breadcrumbs_free(engine->breadcrumbs);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
|
@ -68,6 +68,8 @@ static int context_sync(struct intel_context *ce)
|
||||
} while (!err);
|
||||
mutex_unlock(&tl->mutex);
|
||||
|
||||
/* Wait for all barriers to complete (remote CPU) before we check */
|
||||
i915_active_unlock_wait(&ce->active);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -2729,7 +2729,7 @@ static int create_gang(struct intel_engine_cs *engine,
|
||||
i915_gem_object_put(obj);
|
||||
intel_context_put(ce);
|
||||
|
||||
rq->client_link.next = &(*prev)->client_link;
|
||||
rq->mock.link.next = &(*prev)->mock.link;
|
||||
*prev = rq;
|
||||
return 0;
|
||||
|
||||
@ -2970,8 +2970,7 @@ static int live_preempt_gang(void *arg)
|
||||
}
|
||||
|
||||
while (rq) { /* wait for each rq from highest to lowest prio */
|
||||
struct i915_request *n =
|
||||
list_next_entry(rq, client_link);
|
||||
struct i915_request *n = list_next_entry(rq, mock.link);
|
||||
|
||||
if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
|
||||
struct drm_printer p =
|
||||
@ -3090,7 +3089,7 @@ static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
|
||||
return vma;
|
||||
}
|
||||
|
||||
err = i915_ggtt_pin(vma, 0, 0);
|
||||
err = i915_ggtt_pin(vma, NULL, 0, 0);
|
||||
if (err) {
|
||||
i915_vma_put(vma);
|
||||
return ERR_PTR(err);
|
||||
@ -4997,6 +4996,7 @@ static int __live_lrc_state(struct intel_engine_cs *engine,
|
||||
{
|
||||
struct intel_context *ce;
|
||||
struct i915_request *rq;
|
||||
struct i915_gem_ww_ctx ww;
|
||||
enum {
|
||||
RING_START_IDX = 0,
|
||||
RING_TAIL_IDX,
|
||||
@ -5011,7 +5011,11 @@ static int __live_lrc_state(struct intel_engine_cs *engine,
|
||||
if (IS_ERR(ce))
|
||||
return PTR_ERR(ce);
|
||||
|
||||
err = intel_context_pin(ce);
|
||||
i915_gem_ww_ctx_init(&ww, false);
|
||||
retry:
|
||||
err = i915_gem_object_lock(scratch->obj, &ww);
|
||||
if (!err)
|
||||
err = intel_context_pin_ww(ce, &ww);
|
||||
if (err)
|
||||
goto err_put;
|
||||
|
||||
@ -5040,11 +5044,9 @@ static int __live_lrc_state(struct intel_engine_cs *engine,
|
||||
*cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
|
||||
*cs++ = 0;
|
||||
|
||||
i915_vma_lock(scratch);
|
||||
err = i915_request_await_object(rq, scratch->obj, true);
|
||||
if (!err)
|
||||
err = i915_vma_move_to_active(scratch, rq, EXEC_OBJECT_WRITE);
|
||||
i915_vma_unlock(scratch);
|
||||
|
||||
i915_request_get(rq);
|
||||
i915_request_add(rq);
|
||||
@ -5081,6 +5083,12 @@ err_rq:
|
||||
err_unpin:
|
||||
intel_context_unpin(ce);
|
||||
err_put:
|
||||
if (err == -EDEADLK) {
|
||||
err = i915_gem_ww_ctx_backoff(&ww);
|
||||
if (!err)
|
||||
goto retry;
|
||||
}
|
||||
i915_gem_ww_ctx_fini(&ww);
|
||||
intel_context_put(ce);
|
||||
return err;
|
||||
}
|
||||
|
@ -77,20 +77,20 @@ create_spin_counter(struct intel_engine_cs *engine,
|
||||
|
||||
vma = i915_vma_instance(obj, vm, NULL);
|
||||
if (IS_ERR(vma)) {
|
||||
i915_gem_object_put(obj);
|
||||
return vma;
|
||||
err = PTR_ERR(vma);
|
||||
goto err_put;
|
||||
}
|
||||
|
||||
err = i915_vma_pin(vma, 0, 0, PIN_USER);
|
||||
if (err) {
|
||||
i915_vma_put(vma);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
if (err)
|
||||
goto err_unlock;
|
||||
|
||||
i915_vma_lock(vma);
|
||||
|
||||
base = i915_gem_object_pin_map(obj, I915_MAP_WC);
|
||||
if (IS_ERR(base)) {
|
||||
i915_gem_object_put(obj);
|
||||
return ERR_CAST(base);
|
||||
err = PTR_ERR(base);
|
||||
goto err_unpin;
|
||||
}
|
||||
cs = base;
|
||||
|
||||
@ -134,6 +134,14 @@ create_spin_counter(struct intel_engine_cs *engine,
|
||||
*cancel = base + loop;
|
||||
*counter = srm ? memset32(base + end, 0, 1) : NULL;
|
||||
return vma;
|
||||
|
||||
err_unpin:
|
||||
i915_vma_unpin(vma);
|
||||
err_unlock:
|
||||
i915_vma_unlock(vma);
|
||||
err_put:
|
||||
i915_gem_object_put(obj);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
|
||||
@ -639,7 +647,6 @@ int live_rps_frequency_cs(void *arg)
|
||||
goto err_vma;
|
||||
}
|
||||
|
||||
i915_vma_lock(vma);
|
||||
err = i915_request_await_object(rq, vma->obj, false);
|
||||
if (!err)
|
||||
err = i915_vma_move_to_active(vma, rq, 0);
|
||||
@ -647,7 +654,6 @@ int live_rps_frequency_cs(void *arg)
|
||||
err = rq->engine->emit_bb_start(rq,
|
||||
vma->node.start,
|
||||
PAGE_SIZE, 0);
|
||||
i915_vma_unlock(vma);
|
||||
i915_request_add(rq);
|
||||
if (err)
|
||||
goto err_vma;
|
||||
@ -708,6 +714,7 @@ err_vma:
|
||||
i915_gem_object_flush_map(vma->obj);
|
||||
i915_gem_object_unpin_map(vma->obj);
|
||||
i915_vma_unpin(vma);
|
||||
i915_vma_unlock(vma);
|
||||
i915_vma_put(vma);
|
||||
|
||||
st_engine_heartbeat_enable(engine);
|
||||
@ -781,7 +788,6 @@ int live_rps_frequency_srm(void *arg)
|
||||
goto err_vma;
|
||||
}
|
||||
|
||||
i915_vma_lock(vma);
|
||||
err = i915_request_await_object(rq, vma->obj, false);
|
||||
if (!err)
|
||||
err = i915_vma_move_to_active(vma, rq, 0);
|
||||
@ -789,7 +795,6 @@ int live_rps_frequency_srm(void *arg)
|
||||
err = rq->engine->emit_bb_start(rq,
|
||||
vma->node.start,
|
||||
PAGE_SIZE, 0);
|
||||
i915_vma_unlock(vma);
|
||||
i915_request_add(rq);
|
||||
if (err)
|
||||
goto err_vma;
|
||||
@ -849,6 +854,7 @@ err_vma:
|
||||
i915_gem_object_flush_map(vma->obj);
|
||||
i915_gem_object_unpin_map(vma->obj);
|
||||
i915_vma_unpin(vma);
|
||||
i915_vma_unlock(vma);
|
||||
i915_vma_put(vma);
|
||||
|
||||
st_engine_heartbeat_enable(engine);
|
||||
|
@ -72,7 +72,7 @@ static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
|
||||
unsigned long cacheline;
|
||||
int err;
|
||||
|
||||
tl = intel_timeline_create(state->gt, NULL);
|
||||
tl = intel_timeline_create(state->gt);
|
||||
if (IS_ERR(tl))
|
||||
return PTR_ERR(tl);
|
||||
|
||||
@ -455,7 +455,7 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
|
||||
struct i915_request *rq;
|
||||
int err;
|
||||
|
||||
err = intel_timeline_pin(tl);
|
||||
err = intel_timeline_pin(tl, NULL);
|
||||
if (err) {
|
||||
rq = ERR_PTR(err);
|
||||
goto out;
|
||||
@ -487,7 +487,7 @@ checked_intel_timeline_create(struct intel_gt *gt)
|
||||
{
|
||||
struct intel_timeline *tl;
|
||||
|
||||
tl = intel_timeline_create(gt, NULL);
|
||||
tl = intel_timeline_create(gt);
|
||||
if (IS_ERR(tl))
|
||||
return tl;
|
||||
|
||||
@ -660,14 +660,14 @@ static int live_hwsp_wrap(void *arg)
|
||||
* foreign GPU references.
|
||||
*/
|
||||
|
||||
tl = intel_timeline_create(gt, NULL);
|
||||
tl = intel_timeline_create(gt);
|
||||
if (IS_ERR(tl))
|
||||
return PTR_ERR(tl);
|
||||
|
||||
if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
|
||||
goto out_free;
|
||||
|
||||
err = intel_timeline_pin(tl);
|
||||
err = intel_timeline_pin(tl, NULL);
|
||||
if (err)
|
||||
goto out_free;
|
||||
|
||||
|
@ -214,7 +214,7 @@ static int check_whitelist(struct i915_gem_context *ctx,
|
||||
return PTR_ERR(results);
|
||||
|
||||
err = 0;
|
||||
i915_gem_object_lock(results);
|
||||
i915_gem_object_lock(results, NULL);
|
||||
intel_wedge_on_timeout(&wedge, engine->gt, HZ / 5) /* safety net! */
|
||||
err = i915_gem_object_set_to_cpu_domain(results, false);
|
||||
i915_gem_object_unlock(results);
|
||||
|
@ -677,7 +677,7 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
|
||||
goto err;
|
||||
|
||||
flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
|
||||
ret = i915_ggtt_pin(vma, 0, flags);
|
||||
ret = i915_ggtt_pin(vma, NULL, 0, flags);
|
||||
if (ret) {
|
||||
vma = ERR_PTR(ret);
|
||||
goto err;
|
||||
|
@ -1923,6 +1923,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
|
||||
if (ret)
|
||||
goto err_unmap;
|
||||
|
||||
i915_gem_object_unlock(bb->obj);
|
||||
INIT_LIST_HEAD(&bb->list);
|
||||
list_add(&bb->list, &s->workload->shadow_bb);
|
||||
|
||||
@ -2982,7 +2983,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
|
||||
goto put_obj;
|
||||
}
|
||||
|
||||
i915_gem_object_lock(obj);
|
||||
i915_gem_object_lock(obj, NULL);
|
||||
ret = i915_gem_object_set_to_cpu_domain(obj, false);
|
||||
i915_gem_object_unlock(obj);
|
||||
if (ret) {
|
||||
|
@ -403,6 +403,14 @@ static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
|
||||
wa_ctx->indirect_ctx.shadow_va = NULL;
|
||||
}
|
||||
|
||||
static void set_dma_address(struct i915_page_directory *pd, dma_addr_t addr)
|
||||
{
|
||||
struct scatterlist *sg = pd->pt.base->mm.pages->sgl;
|
||||
|
||||
/* This is not a good idea */
|
||||
sg->dma_address = addr;
|
||||
}
|
||||
|
||||
static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
|
||||
struct intel_context *ce)
|
||||
{
|
||||
@ -411,7 +419,7 @@ static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
|
||||
int i = 0;
|
||||
|
||||
if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
|
||||
px_dma(ppgtt->pd) = mm->ppgtt_mm.shadow_pdps[0];
|
||||
set_dma_address(ppgtt->pd, mm->ppgtt_mm.shadow_pdps[0]);
|
||||
} else {
|
||||
for (i = 0; i < GVT_RING_CTX_NR_PDPS; i++) {
|
||||
struct i915_page_directory * const pd =
|
||||
@ -421,7 +429,8 @@ static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
|
||||
shadow ppgtt. */
|
||||
if (!pd)
|
||||
break;
|
||||
px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i];
|
||||
|
||||
set_dma_address(pd, mm->ppgtt_mm.shadow_pdps[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1240,13 +1249,13 @@ i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s,
|
||||
int i;
|
||||
|
||||
if (i915_vm_is_4lvl(&ppgtt->vm)) {
|
||||
px_dma(ppgtt->pd) = s->i915_context_pml4;
|
||||
set_dma_address(ppgtt->pd, s->i915_context_pml4);
|
||||
} else {
|
||||
for (i = 0; i < GEN8_3LVL_PDPES; i++) {
|
||||
struct i915_page_directory * const pd =
|
||||
i915_pd_entry(ppgtt->pd, i);
|
||||
|
||||
px_dma(pd) = s->i915_context_pdps[i];
|
||||
set_dma_address(pd, s->i915_context_pdps[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -28,12 +28,14 @@ static struct i915_global_active {
|
||||
} global;
|
||||
|
||||
struct active_node {
|
||||
struct rb_node node;
|
||||
struct i915_active_fence base;
|
||||
struct i915_active *ref;
|
||||
struct rb_node node;
|
||||
u64 timeline;
|
||||
};
|
||||
|
||||
#define fetch_node(x) rb_entry(READ_ONCE(x), typeof(struct active_node), node)
|
||||
|
||||
static inline struct active_node *
|
||||
node_from_active(struct i915_active_fence *active)
|
||||
{
|
||||
@ -128,8 +130,8 @@ static inline void debug_active_assert(struct i915_active *ref) { }
|
||||
static void
|
||||
__active_retire(struct i915_active *ref)
|
||||
{
|
||||
struct rb_root root = RB_ROOT;
|
||||
struct active_node *it, *n;
|
||||
struct rb_root root;
|
||||
unsigned long flags;
|
||||
|
||||
GEM_BUG_ON(i915_active_is_idle(ref));
|
||||
@ -141,9 +143,25 @@ __active_retire(struct i915_active *ref)
|
||||
GEM_BUG_ON(rcu_access_pointer(ref->excl.fence));
|
||||
debug_active_deactivate(ref);
|
||||
|
||||
root = ref->tree;
|
||||
ref->tree = RB_ROOT;
|
||||
ref->cache = NULL;
|
||||
/* Even if we have not used the cache, we may still have a barrier */
|
||||
if (!ref->cache)
|
||||
ref->cache = fetch_node(ref->tree.rb_node);
|
||||
|
||||
/* Keep the MRU cached node for reuse */
|
||||
if (ref->cache) {
|
||||
/* Discard all other nodes in the tree */
|
||||
rb_erase(&ref->cache->node, &ref->tree);
|
||||
root = ref->tree;
|
||||
|
||||
/* Rebuild the tree with only the cached node */
|
||||
rb_link_node(&ref->cache->node, NULL, &ref->tree.rb_node);
|
||||
rb_insert_color(&ref->cache->node, &ref->tree);
|
||||
GEM_BUG_ON(ref->tree.rb_node != &ref->cache->node);
|
||||
|
||||
/* Make the cached node available for reuse with any timeline */
|
||||
if (IS_ENABLED(CONFIG_64BIT))
|
||||
ref->cache->timeline = 0; /* needs cmpxchg(u64) */
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&ref->tree_lock, flags);
|
||||
|
||||
@ -154,6 +172,7 @@ __active_retire(struct i915_active *ref)
|
||||
/* ... except if you wait on it, you must manage your own references! */
|
||||
wake_up_var(ref);
|
||||
|
||||
/* Finally free the discarded timeline tree */
|
||||
rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
|
||||
GEM_BUG_ON(i915_active_fence_isset(&it->base));
|
||||
kmem_cache_free(global.slab_cache, it);
|
||||
@ -216,12 +235,11 @@ excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
|
||||
active_retire(container_of(cb, struct i915_active, excl.cb));
|
||||
}
|
||||
|
||||
static struct i915_active_fence *
|
||||
active_instance(struct i915_active *ref, struct intel_timeline *tl)
|
||||
static struct active_node *__active_lookup(struct i915_active *ref, u64 idx)
|
||||
{
|
||||
struct active_node *node, *prealloc;
|
||||
struct rb_node **p, *parent;
|
||||
u64 idx = tl->fence_context;
|
||||
struct active_node *it;
|
||||
|
||||
GEM_BUG_ON(idx == 0); /* 0 is the unordered timeline, rsvd for cache */
|
||||
|
||||
/*
|
||||
* We track the most recently used timeline to skip a rbtree search
|
||||
@ -230,8 +248,59 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl)
|
||||
* after the previous activity has been retired, or if it matches the
|
||||
* current timeline.
|
||||
*/
|
||||
node = READ_ONCE(ref->cache);
|
||||
if (node && node->timeline == idx)
|
||||
it = READ_ONCE(ref->cache);
|
||||
if (it) {
|
||||
u64 cached = READ_ONCE(it->timeline);
|
||||
|
||||
/* Once claimed, this slot will only belong to this idx */
|
||||
if (cached == idx)
|
||||
return it;
|
||||
|
||||
#ifdef CONFIG_64BIT /* for cmpxchg(u64) */
|
||||
/*
|
||||
* An unclaimed cache [.timeline=0] can only be claimed once.
|
||||
*
|
||||
* If the value is already non-zero, some other thread has
|
||||
* claimed the cache and we know that is does not match our
|
||||
* idx. If, and only if, the timeline is currently zero is it
|
||||
* worth competing to claim it atomically for ourselves (for
|
||||
* only the winner of that race will cmpxchg return the old
|
||||
* value of 0).
|
||||
*/
|
||||
if (!cached && !cmpxchg(&it->timeline, 0, idx))
|
||||
return it;
|
||||
#endif
|
||||
}
|
||||
|
||||
BUILD_BUG_ON(offsetof(typeof(*it), node));
|
||||
|
||||
/* While active, the tree can only be built; not destroyed */
|
||||
GEM_BUG_ON(i915_active_is_idle(ref));
|
||||
|
||||
it = fetch_node(ref->tree.rb_node);
|
||||
while (it) {
|
||||
if (it->timeline < idx) {
|
||||
it = fetch_node(it->node.rb_right);
|
||||
} else if (it->timeline > idx) {
|
||||
it = fetch_node(it->node.rb_left);
|
||||
} else {
|
||||
WRITE_ONCE(ref->cache, it);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* NB: If the tree rotated beneath us, we may miss our target. */
|
||||
return it;
|
||||
}
|
||||
|
||||
static struct i915_active_fence *
|
||||
active_instance(struct i915_active *ref, u64 idx)
|
||||
{
|
||||
struct active_node *node, *prealloc;
|
||||
struct rb_node **p, *parent;
|
||||
|
||||
node = __active_lookup(ref, idx);
|
||||
if (likely(node))
|
||||
return &node->base;
|
||||
|
||||
/* Preallocate a replacement, just in case */
|
||||
@ -268,10 +337,9 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl)
|
||||
rb_insert_color(&node->node, &ref->tree);
|
||||
|
||||
out:
|
||||
ref->cache = node;
|
||||
WRITE_ONCE(ref->cache, node);
|
||||
spin_unlock_irq(&ref->tree_lock);
|
||||
|
||||
BUILD_BUG_ON(offsetof(typeof(*node), base));
|
||||
return &node->base;
|
||||
}
|
||||
|
||||
@ -353,69 +421,116 @@ __active_del_barrier(struct i915_active *ref, struct active_node *node)
|
||||
return ____active_del_barrier(ref, node, barrier_to_engine(node));
|
||||
}
|
||||
|
||||
int i915_active_ref(struct i915_active *ref,
|
||||
struct intel_timeline *tl,
|
||||
struct dma_fence *fence)
|
||||
static bool
|
||||
replace_barrier(struct i915_active *ref, struct i915_active_fence *active)
|
||||
{
|
||||
if (!is_barrier(active)) /* proto-node used by our idle barrier? */
|
||||
return false;
|
||||
|
||||
/*
|
||||
* This request is on the kernel_context timeline, and so
|
||||
* we can use it to substitute for the pending idle-barrer
|
||||
* request that we want to emit on the kernel_context.
|
||||
*/
|
||||
__active_del_barrier(ref, node_from_active(active));
|
||||
return true;
|
||||
}
|
||||
|
||||
int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence)
|
||||
{
|
||||
struct i915_active_fence *active;
|
||||
int err;
|
||||
|
||||
lockdep_assert_held(&tl->mutex);
|
||||
|
||||
/* Prevent reaping in case we malloc/wait while building the tree */
|
||||
err = i915_active_acquire(ref);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
active = active_instance(ref, tl);
|
||||
active = active_instance(ref, idx);
|
||||
if (!active) {
|
||||
err = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (is_barrier(active)) { /* proto-node used by our idle barrier */
|
||||
/*
|
||||
* This request is on the kernel_context timeline, and so
|
||||
* we can use it to substitute for the pending idle-barrer
|
||||
* request that we want to emit on the kernel_context.
|
||||
*/
|
||||
__active_del_barrier(ref, node_from_active(active));
|
||||
if (replace_barrier(ref, active)) {
|
||||
RCU_INIT_POINTER(active->fence, NULL);
|
||||
atomic_dec(&ref->count);
|
||||
}
|
||||
if (!__i915_active_fence_set(active, fence))
|
||||
atomic_inc(&ref->count);
|
||||
__i915_active_acquire(ref);
|
||||
|
||||
out:
|
||||
i915_active_release(ref);
|
||||
return err;
|
||||
}
|
||||
|
||||
struct dma_fence *
|
||||
i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
|
||||
static struct dma_fence *
|
||||
__i915_active_set_fence(struct i915_active *ref,
|
||||
struct i915_active_fence *active,
|
||||
struct dma_fence *fence)
|
||||
{
|
||||
struct dma_fence *prev;
|
||||
|
||||
/* We expect the caller to manage the exclusive timeline ordering */
|
||||
GEM_BUG_ON(i915_active_is_idle(ref));
|
||||
if (replace_barrier(ref, active)) {
|
||||
RCU_INIT_POINTER(active->fence, fence);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
prev = __i915_active_fence_set(&ref->excl, f);
|
||||
prev = __i915_active_fence_set(active, fence);
|
||||
if (prev)
|
||||
prev = dma_fence_get_rcu(prev);
|
||||
else
|
||||
atomic_inc(&ref->count);
|
||||
__i915_active_acquire(ref);
|
||||
rcu_read_unlock();
|
||||
|
||||
return prev;
|
||||
}
|
||||
|
||||
static struct i915_active_fence *
|
||||
__active_fence(struct i915_active *ref, u64 idx)
|
||||
{
|
||||
struct active_node *it;
|
||||
|
||||
it = __active_lookup(ref, idx);
|
||||
if (unlikely(!it)) { /* Contention with parallel tree builders! */
|
||||
spin_lock_irq(&ref->tree_lock);
|
||||
it = __active_lookup(ref, idx);
|
||||
spin_unlock_irq(&ref->tree_lock);
|
||||
}
|
||||
GEM_BUG_ON(!it); /* slot must be preallocated */
|
||||
|
||||
return &it->base;
|
||||
}
|
||||
|
||||
struct dma_fence *
|
||||
__i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence)
|
||||
{
|
||||
/* Only valid while active, see i915_active_acquire_for_context() */
|
||||
return __i915_active_set_fence(ref, __active_fence(ref, idx), fence);
|
||||
}
|
||||
|
||||
struct dma_fence *
|
||||
i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
|
||||
{
|
||||
/* We expect the caller to manage the exclusive timeline ordering */
|
||||
return __i915_active_set_fence(ref, &ref->excl, f);
|
||||
}
|
||||
|
||||
bool i915_active_acquire_if_busy(struct i915_active *ref)
|
||||
{
|
||||
debug_active_assert(ref);
|
||||
return atomic_add_unless(&ref->count, 1, 0);
|
||||
}
|
||||
|
||||
static void __i915_active_activate(struct i915_active *ref)
|
||||
{
|
||||
spin_lock_irq(&ref->tree_lock); /* __active_retire() */
|
||||
if (!atomic_fetch_inc(&ref->count))
|
||||
debug_active_activate(ref);
|
||||
spin_unlock_irq(&ref->tree_lock);
|
||||
}
|
||||
|
||||
int i915_active_acquire(struct i915_active *ref)
|
||||
{
|
||||
int err;
|
||||
@ -423,19 +538,19 @@ int i915_active_acquire(struct i915_active *ref)
|
||||
if (i915_active_acquire_if_busy(ref))
|
||||
return 0;
|
||||
|
||||
if (!ref->active) {
|
||||
__i915_active_activate(ref);
|
||||
return 0;
|
||||
}
|
||||
|
||||
err = mutex_lock_interruptible(&ref->mutex);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (likely(!i915_active_acquire_if_busy(ref))) {
|
||||
if (ref->active)
|
||||
err = ref->active(ref);
|
||||
if (!err) {
|
||||
spin_lock_irq(&ref->tree_lock); /* __active_retire() */
|
||||
debug_active_activate(ref);
|
||||
atomic_inc(&ref->count);
|
||||
spin_unlock_irq(&ref->tree_lock);
|
||||
}
|
||||
err = ref->active(ref);
|
||||
if (!err)
|
||||
__i915_active_activate(ref);
|
||||
}
|
||||
|
||||
mutex_unlock(&ref->mutex);
|
||||
@ -443,6 +558,24 @@ int i915_active_acquire(struct i915_active *ref)
|
||||
return err;
|
||||
}
|
||||
|
||||
int i915_active_acquire_for_context(struct i915_active *ref, u64 idx)
|
||||
{
|
||||
struct i915_active_fence *active;
|
||||
int err;
|
||||
|
||||
err = i915_active_acquire(ref);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
active = active_instance(ref, idx);
|
||||
if (!active) {
|
||||
i915_active_release(ref);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0; /* return with active ref */
|
||||
}
|
||||
|
||||
void i915_active_release(struct i915_active *ref)
|
||||
{
|
||||
debug_active_assert(ref);
|
||||
@ -651,16 +784,16 @@ int i915_sw_fence_await_active(struct i915_sw_fence *fence,
|
||||
return await_active(ref, flags, sw_await_fence, fence, fence);
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
|
||||
void i915_active_fini(struct i915_active *ref)
|
||||
{
|
||||
debug_active_fini(ref);
|
||||
GEM_BUG_ON(atomic_read(&ref->count));
|
||||
GEM_BUG_ON(work_pending(&ref->work));
|
||||
GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
|
||||
mutex_destroy(&ref->mutex);
|
||||
|
||||
if (ref->cache)
|
||||
kmem_cache_free(global.slab_cache, ref->cache);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline bool is_idle_barrier(struct active_node *node, u64 idx)
|
||||
{
|
||||
@ -674,7 +807,6 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
|
||||
if (RB_EMPTY_ROOT(&ref->tree))
|
||||
return NULL;
|
||||
|
||||
spin_lock_irq(&ref->tree_lock);
|
||||
GEM_BUG_ON(i915_active_is_idle(ref));
|
||||
|
||||
/*
|
||||
@ -700,9 +832,9 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
|
||||
|
||||
prev = p;
|
||||
if (node->timeline < idx)
|
||||
p = p->rb_right;
|
||||
p = READ_ONCE(p->rb_right);
|
||||
else
|
||||
p = p->rb_left;
|
||||
p = READ_ONCE(p->rb_left);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -739,14 +871,13 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
|
||||
goto match;
|
||||
}
|
||||
|
||||
spin_unlock_irq(&ref->tree_lock);
|
||||
|
||||
return NULL;
|
||||
|
||||
match:
|
||||
spin_lock_irq(&ref->tree_lock);
|
||||
rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */
|
||||
if (p == &ref->cache->node)
|
||||
ref->cache = NULL;
|
||||
WRITE_ONCE(ref->cache, NULL);
|
||||
spin_unlock_irq(&ref->tree_lock);
|
||||
|
||||
return rb_entry(p, struct active_node, node);
|
||||
@ -777,7 +908,9 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
|
||||
struct llist_node *prev = first;
|
||||
struct active_node *node;
|
||||
|
||||
rcu_read_lock();
|
||||
node = reuse_idle_barrier(ref, idx);
|
||||
rcu_read_unlock();
|
||||
if (!node) {
|
||||
node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
|
||||
if (!node)
|
||||
@ -801,7 +934,7 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
|
||||
*/
|
||||
RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN));
|
||||
node->base.cb.node.prev = (void *)engine;
|
||||
atomic_inc(&ref->count);
|
||||
__i915_active_acquire(ref);
|
||||
}
|
||||
GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN));
|
||||
|
||||
|
@ -163,14 +163,16 @@ void __i915_active_init(struct i915_active *ref,
|
||||
__i915_active_init(ref, active, retire, &__mkey, &__wkey); \
|
||||
} while (0)
|
||||
|
||||
int i915_active_ref(struct i915_active *ref,
|
||||
struct intel_timeline *tl,
|
||||
struct dma_fence *fence);
|
||||
struct dma_fence *
|
||||
__i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence);
|
||||
int i915_active_ref(struct i915_active *ref, u64 idx, struct dma_fence *fence);
|
||||
|
||||
static inline int
|
||||
i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
|
||||
{
|
||||
return i915_active_ref(ref, i915_request_timeline(rq), &rq->fence);
|
||||
return i915_active_ref(ref,
|
||||
i915_request_timeline(rq)->fence_context,
|
||||
&rq->fence);
|
||||
}
|
||||
|
||||
struct dma_fence *
|
||||
@ -198,7 +200,9 @@ int i915_request_await_active(struct i915_request *rq,
|
||||
#define I915_ACTIVE_AWAIT_BARRIER BIT(2)
|
||||
|
||||
int i915_active_acquire(struct i915_active *ref);
|
||||
int i915_active_acquire_for_context(struct i915_active *ref, u64 idx);
|
||||
bool i915_active_acquire_if_busy(struct i915_active *ref);
|
||||
|
||||
void i915_active_release(struct i915_active *ref);
|
||||
|
||||
static inline void __i915_active_acquire(struct i915_active *ref)
|
||||
@ -213,11 +217,7 @@ i915_active_is_idle(const struct i915_active *ref)
|
||||
return !atomic_read(&ref->count);
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
|
||||
void i915_active_fini(struct i915_active *ref);
|
||||
#else
|
||||
static inline void i915_active_fini(struct i915_active *ref) { }
|
||||
#endif
|
||||
|
||||
int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
|
||||
struct intel_engine_cs *engine);
|
||||
@ -231,4 +231,19 @@ struct i915_active *i915_active_create(void);
|
||||
struct i915_active *i915_active_get(struct i915_active *ref);
|
||||
void i915_active_put(struct i915_active *ref);
|
||||
|
||||
static inline int __i915_request_await_exclusive(struct i915_request *rq,
|
||||
struct i915_active *active)
|
||||
{
|
||||
struct dma_fence *fence;
|
||||
int err = 0;
|
||||
|
||||
fence = i915_active_fence_get(&active->excl);
|
||||
if (fence) {
|
||||
err = i915_request_await_dma_fence(rq, fence);
|
||||
dma_fence_put(fence);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
#endif /* _I915_ACTIVE_H_ */
|
||||
|
@ -1075,6 +1075,7 @@ static void i915_driver_release(struct drm_device *dev)
|
||||
|
||||
intel_memory_regions_driver_release(dev_priv);
|
||||
i915_ggtt_driver_release(dev_priv);
|
||||
i915_gem_drain_freed_objects(dev_priv);
|
||||
|
||||
i915_driver_mmio_release(dev_priv);
|
||||
|
||||
@ -1119,7 +1120,6 @@ static void i915_driver_postclose(struct drm_device *dev, struct drm_file *file)
|
||||
struct drm_i915_file_private *file_priv = file->driver_priv;
|
||||
|
||||
i915_gem_context_close(file);
|
||||
i915_gem_release(dev, file);
|
||||
|
||||
kfree_rcu(file_priv, rcu);
|
||||
|
||||
|
@ -203,11 +203,6 @@ struct drm_i915_file_private {
|
||||
struct rcu_head rcu;
|
||||
};
|
||||
|
||||
struct {
|
||||
spinlock_t lock;
|
||||
struct list_head request_list;
|
||||
} mm;
|
||||
|
||||
struct xarray context_xa;
|
||||
struct xarray vm_xa;
|
||||
|
||||
@ -592,11 +587,6 @@ struct i915_gem_mm {
|
||||
*/
|
||||
atomic_t free_count;
|
||||
|
||||
/**
|
||||
* Small stash of WC pages
|
||||
*/
|
||||
struct pagestash wc_stash;
|
||||
|
||||
/**
|
||||
* tmpfs instance used for shmem backed objects
|
||||
*/
|
||||
@ -1826,11 +1816,18 @@ static inline void i915_gem_drain_workqueue(struct drm_i915_private *i915)
|
||||
}
|
||||
|
||||
struct i915_vma * __must_check
|
||||
i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj,
|
||||
struct i915_gem_ww_ctx *ww,
|
||||
const struct i915_ggtt_view *view,
|
||||
u64 size, u64 alignment, u64 flags);
|
||||
|
||||
static inline struct i915_vma * __must_check
|
||||
i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
|
||||
const struct i915_ggtt_view *view,
|
||||
u64 size,
|
||||
u64 alignment,
|
||||
u64 flags);
|
||||
u64 size, u64 alignment, u64 flags)
|
||||
{
|
||||
return i915_gem_object_ggtt_pin_ww(obj, NULL, view, size, alignment, flags);
|
||||
}
|
||||
|
||||
int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
|
||||
unsigned long flags);
|
||||
@ -1867,7 +1864,6 @@ void i915_gem_suspend_late(struct drm_i915_private *dev_priv);
|
||||
void i915_gem_resume(struct drm_i915_private *dev_priv);
|
||||
|
||||
int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file);
|
||||
void i915_gem_release(struct drm_device *dev, struct drm_file *file);
|
||||
|
||||
int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
|
||||
enum i915_cache_level cache_level);
|
||||
|
@ -335,12 +335,20 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
|
||||
u64 remain;
|
||||
int ret;
|
||||
|
||||
ret = i915_gem_object_prepare_read(obj, &needs_clflush);
|
||||
ret = i915_gem_object_lock_interruptible(obj, NULL);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = i915_gem_object_prepare_read(obj, &needs_clflush);
|
||||
if (ret) {
|
||||
i915_gem_object_unlock(obj);
|
||||
return ret;
|
||||
}
|
||||
|
||||
fence = i915_gem_object_lock_fence(obj);
|
||||
i915_gem_object_finish_access(obj);
|
||||
i915_gem_object_unlock(obj);
|
||||
|
||||
if (!fence)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -420,7 +428,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
|
||||
GEM_BUG_ON(!drm_mm_node_allocated(&node));
|
||||
}
|
||||
|
||||
ret = i915_gem_object_lock_interruptible(obj);
|
||||
ret = i915_gem_object_lock_interruptible(obj, NULL);
|
||||
if (ret)
|
||||
goto out_unpin;
|
||||
|
||||
@ -619,7 +627,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
|
||||
GEM_BUG_ON(!drm_mm_node_allocated(&node));
|
||||
}
|
||||
|
||||
ret = i915_gem_object_lock_interruptible(obj);
|
||||
ret = i915_gem_object_lock_interruptible(obj, NULL);
|
||||
if (ret)
|
||||
goto out_unpin;
|
||||
|
||||
@ -734,12 +742,20 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
|
||||
u64 remain;
|
||||
int ret;
|
||||
|
||||
ret = i915_gem_object_prepare_write(obj, &needs_clflush);
|
||||
ret = i915_gem_object_lock_interruptible(obj, NULL);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = i915_gem_object_prepare_write(obj, &needs_clflush);
|
||||
if (ret) {
|
||||
i915_gem_object_unlock(obj);
|
||||
return ret;
|
||||
}
|
||||
|
||||
fence = i915_gem_object_lock_fence(obj);
|
||||
i915_gem_object_finish_access(obj);
|
||||
i915_gem_object_unlock(obj);
|
||||
|
||||
if (!fence)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -946,11 +962,10 @@ static void discard_ggtt_vma(struct i915_vma *vma)
|
||||
}
|
||||
|
||||
struct i915_vma *
|
||||
i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
|
||||
const struct i915_ggtt_view *view,
|
||||
u64 size,
|
||||
u64 alignment,
|
||||
u64 flags)
|
||||
i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj,
|
||||
struct i915_gem_ww_ctx *ww,
|
||||
const struct i915_ggtt_view *view,
|
||||
u64 size, u64 alignment, u64 flags)
|
||||
{
|
||||
struct drm_i915_private *i915 = to_i915(obj->base.dev);
|
||||
struct i915_ggtt *ggtt = &i915->ggtt;
|
||||
@ -1016,7 +1031,7 @@ new_vma:
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
|
||||
ret = i915_vma_pin_ww(vma, ww, size, alignment, flags | PIN_GLOBAL);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
@ -1290,7 +1305,7 @@ int i915_gem_freeze_late(struct drm_i915_private *i915)
|
||||
i915_gem_drain_freed_objects(i915);
|
||||
|
||||
list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) {
|
||||
i915_gem_object_lock(obj);
|
||||
i915_gem_object_lock(obj, NULL);
|
||||
drm_WARN_ON(&i915->drm,
|
||||
i915_gem_object_set_to_cpu_domain(obj, true));
|
||||
i915_gem_object_unlock(obj);
|
||||
@ -1301,21 +1316,6 @@ int i915_gem_freeze_late(struct drm_i915_private *i915)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void i915_gem_release(struct drm_device *dev, struct drm_file *file)
|
||||
{
|
||||
struct drm_i915_file_private *file_priv = file->driver_priv;
|
||||
struct i915_request *request;
|
||||
|
||||
/* Clean up our request list when the client is going away, so that
|
||||
* later retire_requests won't dereference our soon-to-be-gone
|
||||
* file_priv.
|
||||
*/
|
||||
spin_lock(&file_priv->mm.lock);
|
||||
list_for_each_entry(request, &file_priv->mm.request_list, client_link)
|
||||
request->file_priv = NULL;
|
||||
spin_unlock(&file_priv->mm.lock);
|
||||
}
|
||||
|
||||
int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
|
||||
{
|
||||
struct drm_i915_file_private *file_priv;
|
||||
@ -1331,9 +1331,6 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
|
||||
file_priv->dev_priv = i915;
|
||||
file_priv->file = file;
|
||||
|
||||
spin_lock_init(&file_priv->mm.lock);
|
||||
INIT_LIST_HEAD(&file_priv->mm.request_list);
|
||||
|
||||
file_priv->bsd_engine = -1;
|
||||
file_priv->hang_timestamp = jiffies;
|
||||
|
||||
@ -1344,6 +1341,58 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
|
||||
return ret;
|
||||
}
|
||||
|
||||
void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ww, bool intr)
|
||||
{
|
||||
ww_acquire_init(&ww->ctx, &reservation_ww_class);
|
||||
INIT_LIST_HEAD(&ww->obj_list);
|
||||
ww->intr = intr;
|
||||
ww->contended = NULL;
|
||||
}
|
||||
|
||||
static void i915_gem_ww_ctx_unlock_all(struct i915_gem_ww_ctx *ww)
|
||||
{
|
||||
struct drm_i915_gem_object *obj;
|
||||
|
||||
while ((obj = list_first_entry_or_null(&ww->obj_list, struct drm_i915_gem_object, obj_link))) {
|
||||
list_del(&obj->obj_link);
|
||||
i915_gem_object_unlock(obj);
|
||||
}
|
||||
}
|
||||
|
||||
void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
list_del(&obj->obj_link);
|
||||
i915_gem_object_unlock(obj);
|
||||
}
|
||||
|
||||
void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ww)
|
||||
{
|
||||
i915_gem_ww_ctx_unlock_all(ww);
|
||||
WARN_ON(ww->contended);
|
||||
ww_acquire_fini(&ww->ctx);
|
||||
}
|
||||
|
||||
int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ww)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (WARN_ON(!ww->contended))
|
||||
return -EINVAL;
|
||||
|
||||
i915_gem_ww_ctx_unlock_all(ww);
|
||||
if (ww->intr)
|
||||
ret = dma_resv_lock_slow_interruptible(ww->contended->base.resv, &ww->ctx);
|
||||
else
|
||||
dma_resv_lock_slow(ww->contended->base.resv, &ww->ctx);
|
||||
|
||||
if (!ret)
|
||||
list_add_tail(&ww->contended->obj_link, &ww->obj_list);
|
||||
|
||||
ww->contended = NULL;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
|
||||
#include "selftests/mock_gem_device.c"
|
||||
#include "selftests/i915_gem.c"
|
||||
|
@ -116,4 +116,16 @@ static inline bool __tasklet_is_scheduled(struct tasklet_struct *t)
|
||||
return test_bit(TASKLET_STATE_SCHED, &t->state);
|
||||
}
|
||||
|
||||
struct i915_gem_ww_ctx {
|
||||
struct ww_acquire_ctx ctx;
|
||||
struct list_head obj_list;
|
||||
bool intr;
|
||||
struct drm_i915_gem_object *contended;
|
||||
};
|
||||
|
||||
void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr);
|
||||
void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx);
|
||||
int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx);
|
||||
void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj);
|
||||
|
||||
#endif /* __I915_GEM_H__ */
|
||||
|
@ -41,6 +41,7 @@
|
||||
#include "display/intel_lpe_audio.h"
|
||||
#include "display/intel_psr.h"
|
||||
|
||||
#include "gt/intel_breadcrumbs.h"
|
||||
#include "gt/intel_gt.h"
|
||||
#include "gt/intel_gt_irq.h"
|
||||
#include "gt/intel_gt_pm_irq.h"
|
||||
|
@ -1195,24 +1195,39 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
|
||||
struct i915_gem_engines_iter it;
|
||||
struct i915_gem_context *ctx = stream->ctx;
|
||||
struct intel_context *ce;
|
||||
int err;
|
||||
struct i915_gem_ww_ctx ww;
|
||||
int err = -ENODEV;
|
||||
|
||||
for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
|
||||
if (ce->engine != stream->engine) /* first match! */
|
||||
continue;
|
||||
|
||||
/*
|
||||
* As the ID is the gtt offset of the context's vma we
|
||||
* pin the vma to ensure the ID remains fixed.
|
||||
*/
|
||||
err = intel_context_pin(ce);
|
||||
if (err == 0) {
|
||||
stream->pinned_ctx = ce;
|
||||
break;
|
||||
}
|
||||
err = 0;
|
||||
break;
|
||||
}
|
||||
i915_gem_context_unlock_engines(ctx);
|
||||
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
|
||||
i915_gem_ww_ctx_init(&ww, true);
|
||||
retry:
|
||||
/*
|
||||
* As the ID is the gtt offset of the context's vma we
|
||||
* pin the vma to ensure the ID remains fixed.
|
||||
*/
|
||||
err = intel_context_pin_ww(ce, &ww);
|
||||
if (err == -EDEADLK) {
|
||||
err = i915_gem_ww_ctx_backoff(&ww);
|
||||
if (!err)
|
||||
goto retry;
|
||||
}
|
||||
i915_gem_ww_ctx_fini(&ww);
|
||||
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
|
||||
stream->pinned_ctx = ce;
|
||||
return stream->pinned_ctx;
|
||||
}
|
||||
|
||||
@ -1923,15 +1938,22 @@ emit_oa_config(struct i915_perf_stream *stream,
|
||||
{
|
||||
struct i915_request *rq;
|
||||
struct i915_vma *vma;
|
||||
struct i915_gem_ww_ctx ww;
|
||||
int err;
|
||||
|
||||
vma = get_oa_vma(stream, oa_config);
|
||||
if (IS_ERR(vma))
|
||||
return PTR_ERR(vma);
|
||||
|
||||
err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
|
||||
i915_gem_ww_ctx_init(&ww, true);
|
||||
retry:
|
||||
err = i915_gem_object_lock(vma->obj, &ww);
|
||||
if (err)
|
||||
goto err_vma_put;
|
||||
goto err;
|
||||
|
||||
err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL | PIN_HIGH);
|
||||
if (err)
|
||||
goto err;
|
||||
|
||||
intel_engine_pm_get(ce->engine);
|
||||
rq = i915_request_create(ce);
|
||||
@ -1953,11 +1975,9 @@ emit_oa_config(struct i915_perf_stream *stream,
|
||||
goto err_add_request;
|
||||
}
|
||||
|
||||
i915_vma_lock(vma);
|
||||
err = i915_request_await_object(rq, vma->obj, 0);
|
||||
if (!err)
|
||||
err = i915_vma_move_to_active(vma, rq, 0);
|
||||
i915_vma_unlock(vma);
|
||||
if (err)
|
||||
goto err_add_request;
|
||||
|
||||
@ -1971,7 +1991,14 @@ err_add_request:
|
||||
i915_request_add(rq);
|
||||
err_vma_unpin:
|
||||
i915_vma_unpin(vma);
|
||||
err_vma_put:
|
||||
err:
|
||||
if (err == -EDEADLK) {
|
||||
err = i915_gem_ww_ctx_backoff(&ww);
|
||||
if (!err)
|
||||
goto retry;
|
||||
}
|
||||
|
||||
i915_gem_ww_ctx_fini(&ww);
|
||||
i915_vma_put(vma);
|
||||
return err;
|
||||
}
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include <linux/sched/signal.h>
|
||||
|
||||
#include "gem/i915_gem_context.h"
|
||||
#include "gt/intel_breadcrumbs.h"
|
||||
#include "gt/intel_context.h"
|
||||
#include "gt/intel_ring.h"
|
||||
#include "gt/intel_rps.h"
|
||||
@ -186,48 +187,34 @@ static void irq_execute_cb_hook(struct irq_work *wrk)
|
||||
irq_execute_cb(wrk);
|
||||
}
|
||||
|
||||
static void __notify_execute_cb(struct i915_request *rq)
|
||||
static __always_inline void
|
||||
__notify_execute_cb(struct i915_request *rq, bool (*fn)(struct irq_work *wrk))
|
||||
{
|
||||
struct execute_cb *cb, *cn;
|
||||
|
||||
lockdep_assert_held(&rq->lock);
|
||||
|
||||
GEM_BUG_ON(!i915_request_is_active(rq));
|
||||
if (llist_empty(&rq->execute_cb))
|
||||
return;
|
||||
|
||||
llist_for_each_entry_safe(cb, cn, rq->execute_cb.first, work.llnode)
|
||||
irq_work_queue(&cb->work);
|
||||
|
||||
/*
|
||||
* XXX Rollback on __i915_request_unsubmit()
|
||||
*
|
||||
* In the future, perhaps when we have an active time-slicing scheduler,
|
||||
* it will be interesting to unsubmit parallel execution and remove
|
||||
* busywaits from the GPU until their master is restarted. This is
|
||||
* quite hairy, we have to carefully rollback the fence and do a
|
||||
* preempt-to-idle cycle on the target engine, all the while the
|
||||
* master execute_cb may refire.
|
||||
*/
|
||||
init_llist_head(&rq->execute_cb);
|
||||
llist_for_each_entry_safe(cb, cn,
|
||||
llist_del_all(&rq->execute_cb),
|
||||
work.llnode)
|
||||
fn(&cb->work);
|
||||
}
|
||||
|
||||
static inline void
|
||||
remove_from_client(struct i915_request *request)
|
||||
static void __notify_execute_cb_irq(struct i915_request *rq)
|
||||
{
|
||||
struct drm_i915_file_private *file_priv;
|
||||
__notify_execute_cb(rq, irq_work_queue);
|
||||
}
|
||||
|
||||
if (!READ_ONCE(request->file_priv))
|
||||
return;
|
||||
static bool irq_work_imm(struct irq_work *wrk)
|
||||
{
|
||||
wrk->func(wrk);
|
||||
return false;
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
file_priv = xchg(&request->file_priv, NULL);
|
||||
if (file_priv) {
|
||||
spin_lock(&file_priv->mm.lock);
|
||||
list_del(&request->client_link);
|
||||
spin_unlock(&file_priv->mm.lock);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
static void __notify_execute_cb_imm(struct i915_request *rq)
|
||||
{
|
||||
__notify_execute_cb(rq, irq_work_imm);
|
||||
}
|
||||
|
||||
static void free_capture_list(struct i915_request *request)
|
||||
@ -274,9 +261,16 @@ static void remove_from_engine(struct i915_request *rq)
|
||||
locked = engine;
|
||||
}
|
||||
list_del_init(&rq->sched.link);
|
||||
|
||||
clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
|
||||
clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
|
||||
|
||||
/* Prevent further __await_execution() registering a cb, then flush */
|
||||
set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
|
||||
|
||||
spin_unlock_irq(&locked->active.lock);
|
||||
|
||||
__notify_execute_cb_imm(rq);
|
||||
}
|
||||
|
||||
bool i915_request_retire(struct i915_request *rq)
|
||||
@ -288,6 +282,7 @@ bool i915_request_retire(struct i915_request *rq)
|
||||
|
||||
GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
|
||||
trace_i915_request_retire(rq);
|
||||
i915_request_mark_complete(rq);
|
||||
|
||||
/*
|
||||
* We know the GPU must have read the request to have
|
||||
@ -305,32 +300,30 @@ bool i915_request_retire(struct i915_request *rq)
|
||||
__i915_request_fill(rq, POISON_FREE);
|
||||
rq->ring->head = rq->postfix;
|
||||
|
||||
if (!i915_request_signaled(rq)) {
|
||||
spin_lock_irq(&rq->lock);
|
||||
dma_fence_signal_locked(&rq->fence);
|
||||
spin_unlock_irq(&rq->lock);
|
||||
}
|
||||
|
||||
if (i915_request_has_waitboost(rq)) {
|
||||
GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters));
|
||||
atomic_dec(&rq->engine->gt->rps.num_waiters);
|
||||
}
|
||||
|
||||
/*
|
||||
* We only loosely track inflight requests across preemption,
|
||||
* and so we may find ourselves attempting to retire a _completed_
|
||||
* request that we have removed from the HW and put back on a run
|
||||
* queue.
|
||||
*
|
||||
* As we set I915_FENCE_FLAG_ACTIVE on the request, this should be
|
||||
* after removing the breadcrumb and signaling it, so that we do not
|
||||
* inadvertently attach the breadcrumb to a completed request.
|
||||
*/
|
||||
remove_from_engine(rq);
|
||||
|
||||
spin_lock_irq(&rq->lock);
|
||||
i915_request_mark_complete(rq);
|
||||
if (!i915_request_signaled(rq))
|
||||
dma_fence_signal_locked(&rq->fence);
|
||||
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
|
||||
i915_request_cancel_breadcrumb(rq);
|
||||
if (i915_request_has_waitboost(rq)) {
|
||||
GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters));
|
||||
atomic_dec(&rq->engine->gt->rps.num_waiters);
|
||||
}
|
||||
if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
|
||||
set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
|
||||
__notify_execute_cb(rq);
|
||||
}
|
||||
GEM_BUG_ON(!llist_empty(&rq->execute_cb));
|
||||
spin_unlock_irq(&rq->lock);
|
||||
|
||||
remove_from_client(rq);
|
||||
__list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */
|
||||
|
||||
intel_context_exit(rq->context);
|
||||
@ -357,12 +350,6 @@ void i915_request_retire_upto(struct i915_request *rq)
|
||||
} while (i915_request_retire(tmp) && tmp != rq);
|
||||
}
|
||||
|
||||
static void __llist_add(struct llist_node *node, struct llist_head *head)
|
||||
{
|
||||
node->next = head->first;
|
||||
head->first = node;
|
||||
}
|
||||
|
||||
static struct i915_request * const *
|
||||
__engine_active(struct intel_engine_cs *engine)
|
||||
{
|
||||
@ -388,17 +375,38 @@ static bool __request_in_flight(const struct i915_request *signal)
|
||||
* As we know that there are always preemption points between
|
||||
* requests, we know that only the currently executing request
|
||||
* may be still active even though we have cleared the flag.
|
||||
* However, we can't rely on our tracking of ELSP[0] to known
|
||||
* However, we can't rely on our tracking of ELSP[0] to know
|
||||
* which request is currently active and so maybe stuck, as
|
||||
* the tracking maybe an event behind. Instead assume that
|
||||
* if the context is still inflight, then it is still active
|
||||
* even if the active flag has been cleared.
|
||||
*
|
||||
* To further complicate matters, if there a pending promotion, the HW
|
||||
* may either perform a context switch to the second inflight execlists,
|
||||
* or it may switch to the pending set of execlists. In the case of the
|
||||
* latter, it may send the ACK and we process the event copying the
|
||||
* pending[] over top of inflight[], _overwriting_ our *active. Since
|
||||
* this implies the HW is arbitrating and not struck in *active, we do
|
||||
* not worry about complete accuracy, but we do require no read/write
|
||||
* tearing of the pointer [the read of the pointer must be valid, even
|
||||
* as the array is being overwritten, for which we require the writes
|
||||
* to avoid tearing.]
|
||||
*
|
||||
* Note that the read of *execlists->active may race with the promotion
|
||||
* of execlists->pending[] to execlists->inflight[], overwritting
|
||||
* the value at *execlists->active. This is fine. The promotion implies
|
||||
* that we received an ACK from the HW, and so the context is not
|
||||
* stuck -- if we do not see ourselves in *active, the inflight status
|
||||
* is valid. If instead we see ourselves being copied into *active,
|
||||
* we are inflight and may signal the callback.
|
||||
*/
|
||||
if (!intel_context_inflight(signal->context))
|
||||
return false;
|
||||
|
||||
rcu_read_lock();
|
||||
for (port = __engine_active(signal->engine); (rq = *port); port++) {
|
||||
for (port = __engine_active(signal->engine);
|
||||
(rq = READ_ONCE(*port)); /* may race with promotion of pending[] */
|
||||
port++) {
|
||||
if (rq->context == signal->context) {
|
||||
inflight = i915_seqno_passed(rq->fence.seqno,
|
||||
signal->fence.seqno);
|
||||
@ -439,18 +447,24 @@ __await_execution(struct i915_request *rq,
|
||||
cb->work.func = irq_execute_cb_hook;
|
||||
}
|
||||
|
||||
spin_lock_irq(&signal->lock);
|
||||
if (i915_request_is_active(signal) || __request_in_flight(signal)) {
|
||||
if (hook) {
|
||||
hook(rq, &signal->fence);
|
||||
i915_request_put(signal);
|
||||
}
|
||||
i915_sw_fence_complete(cb->fence);
|
||||
kmem_cache_free(global.slab_execute_cbs, cb);
|
||||
} else {
|
||||
__llist_add(&cb->work.llnode, &signal->execute_cb);
|
||||
/*
|
||||
* Register the callback first, then see if the signaler is already
|
||||
* active. This ensures that if we race with the
|
||||
* __notify_execute_cb from i915_request_submit() and we are not
|
||||
* included in that list, we get a second bite of the cherry and
|
||||
* execute it ourselves. After this point, a future
|
||||
* i915_request_submit() will notify us.
|
||||
*
|
||||
* In i915_request_retire() we set the ACTIVE bit on a completed
|
||||
* request (then flush the execute_cb). So by registering the
|
||||
* callback first, then checking the ACTIVE bit, we serialise with
|
||||
* the completed/retired request.
|
||||
*/
|
||||
if (llist_add(&cb->work.llnode, &signal->execute_cb)) {
|
||||
if (i915_request_is_active(signal) ||
|
||||
__request_in_flight(signal))
|
||||
__notify_execute_cb_imm(signal);
|
||||
}
|
||||
spin_unlock_irq(&signal->lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -566,18 +580,28 @@ xfer:
|
||||
clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX Rollback bonded-execution on __i915_request_unsubmit()?
|
||||
*
|
||||
* In the future, perhaps when we have an active time-slicing scheduler,
|
||||
* it will be interesting to unsubmit parallel execution and remove
|
||||
* busywaits from the GPU until their master is restarted. This is
|
||||
* quite hairy, we have to carefully rollback the fence and do a
|
||||
* preempt-to-idle cycle on the target engine, all the while the
|
||||
* master execute_cb may refire.
|
||||
*/
|
||||
__notify_execute_cb_irq(request);
|
||||
|
||||
/* We may be recursing from the signal callback of another i915 fence */
|
||||
if (!i915_request_signaled(request)) {
|
||||
spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
|
||||
|
||||
__notify_execute_cb(request);
|
||||
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
|
||||
&request->fence.flags) &&
|
||||
!i915_request_enable_breadcrumb(request))
|
||||
intel_engine_signal_breadcrumbs(engine);
|
||||
|
||||
spin_unlock(&request->lock);
|
||||
GEM_BUG_ON(!llist_empty(&request->execute_cb));
|
||||
}
|
||||
|
||||
return result;
|
||||
@ -600,27 +624,27 @@ void __i915_request_unsubmit(struct i915_request *request)
|
||||
{
|
||||
struct intel_engine_cs *engine = request->engine;
|
||||
|
||||
/*
|
||||
* Only unwind in reverse order, required so that the per-context list
|
||||
* is kept in seqno/ring order.
|
||||
*/
|
||||
RQ_TRACE(request, "\n");
|
||||
|
||||
GEM_BUG_ON(!irqs_disabled());
|
||||
lockdep_assert_held(&engine->active.lock);
|
||||
|
||||
/*
|
||||
* Only unwind in reverse order, required so that the per-context list
|
||||
* is kept in seqno/ring order.
|
||||
* Before we remove this breadcrumb from the signal list, we have
|
||||
* to ensure that a concurrent dma_fence_enable_signaling() does not
|
||||
* attach itself. We first mark the request as no longer active and
|
||||
* make sure that is visible to other cores, and then remove the
|
||||
* breadcrumb if attached.
|
||||
*/
|
||||
|
||||
/* We may be recursing from the signal callback of another i915 fence */
|
||||
spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
|
||||
|
||||
GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
|
||||
clear_bit_unlock(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
|
||||
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
|
||||
i915_request_cancel_breadcrumb(request);
|
||||
|
||||
GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
|
||||
clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
|
||||
|
||||
spin_unlock(&request->lock);
|
||||
|
||||
/* We've already spun, don't charge on resubmitting. */
|
||||
if (request->sched.semaphores && i915_request_started(request))
|
||||
request->sched.semaphores = 0;
|
||||
@ -757,7 +781,6 @@ static void __i915_request_ctor(void *arg)
|
||||
|
||||
dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, 0, 0);
|
||||
|
||||
rq->file_priv = NULL;
|
||||
rq->capture_list = NULL;
|
||||
|
||||
init_llist_head(&rq->execute_cb);
|
||||
@ -847,7 +870,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
|
||||
|
||||
/* No zalloc, everything must be cleared after use */
|
||||
rq->batch = NULL;
|
||||
GEM_BUG_ON(rq->file_priv);
|
||||
GEM_BUG_ON(rq->capture_list);
|
||||
GEM_BUG_ON(!llist_empty(&rq->execute_cb));
|
||||
|
||||
@ -1640,7 +1662,7 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
|
||||
return this_cpu != cpu;
|
||||
}
|
||||
|
||||
static bool __i915_spin_request(const struct i915_request * const rq, int state)
|
||||
static bool __i915_spin_request(struct i915_request * const rq, int state)
|
||||
{
|
||||
unsigned long timeout_ns;
|
||||
unsigned int cpu;
|
||||
@ -1673,7 +1695,7 @@ static bool __i915_spin_request(const struct i915_request * const rq, int state)
|
||||
timeout_ns = READ_ONCE(rq->engine->props.max_busywait_duration_ns);
|
||||
timeout_ns += local_clock_ns(&cpu);
|
||||
do {
|
||||
if (i915_request_completed(rq))
|
||||
if (dma_fence_is_signaled(&rq->fence))
|
||||
return true;
|
||||
|
||||
if (signal_pending_state(state, current))
|
||||
@ -1697,7 +1719,7 @@ static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb)
|
||||
{
|
||||
struct request_wait *wait = container_of(cb, typeof(*wait), cb);
|
||||
|
||||
wake_up_process(wait->tsk);
|
||||
wake_up_process(fetch_and_zero(&wait->tsk));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1766,10 +1788,8 @@ long i915_request_wait(struct i915_request *rq,
|
||||
* duration, which we currently lack.
|
||||
*/
|
||||
if (IS_ACTIVE(CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT) &&
|
||||
__i915_spin_request(rq, state)) {
|
||||
dma_fence_signal(&rq->fence);
|
||||
__i915_spin_request(rq, state))
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* This client is about to stall waiting for the GPU. In many cases
|
||||
@ -1790,15 +1810,29 @@ long i915_request_wait(struct i915_request *rq,
|
||||
if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Flush the submission tasklet, but only if it may help this request.
|
||||
*
|
||||
* We sometimes experience some latency between the HW interrupts and
|
||||
* tasklet execution (mostly due to ksoftirqd latency, but it can also
|
||||
* be due to lazy CS events), so lets run the tasklet manually if there
|
||||
* is a chance it may submit this request. If the request is not ready
|
||||
* to run, as it is waiting for other fences to be signaled, flushing
|
||||
* the tasklet is busy work without any advantage for this client.
|
||||
*
|
||||
* If the HW is being lazy, this is the last chance before we go to
|
||||
* sleep to catch any pending events. We will check periodically in
|
||||
* the heartbeat to flush the submission tasklets as a last resort
|
||||
* for unhappy HW.
|
||||
*/
|
||||
if (i915_request_is_ready(rq))
|
||||
intel_engine_flush_submission(rq->engine);
|
||||
|
||||
for (;;) {
|
||||
set_current_state(state);
|
||||
|
||||
if (i915_request_completed(rq)) {
|
||||
dma_fence_signal(&rq->fence);
|
||||
if (dma_fence_is_signaled(&rq->fence))
|
||||
break;
|
||||
}
|
||||
|
||||
intel_engine_flush_submission(rq->engine);
|
||||
|
||||
if (signal_pending_state(state, current)) {
|
||||
timeout = -ERESTARTSYS;
|
||||
@ -1814,7 +1848,9 @@ long i915_request_wait(struct i915_request *rq,
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
dma_fence_remove_callback(&rq->fence, &wait.cb);
|
||||
if (READ_ONCE(wait.tsk))
|
||||
dma_fence_remove_callback(&rq->fence, &wait.cb);
|
||||
GEM_BUG_ON(!list_empty(&wait.cb.node));
|
||||
|
||||
out:
|
||||
mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_);
|
||||
|
@ -284,10 +284,6 @@ struct i915_request {
|
||||
/** timeline->request entry for this request */
|
||||
struct list_head link;
|
||||
|
||||
struct drm_i915_file_private *file_priv;
|
||||
/** file_priv list entry for this request */
|
||||
struct list_head client_link;
|
||||
|
||||
I915_SELFTEST_DECLARE(struct {
|
||||
struct list_head link;
|
||||
unsigned long delay;
|
||||
@ -365,10 +361,6 @@ void i915_request_submit(struct i915_request *request);
|
||||
void __i915_request_unsubmit(struct i915_request *request);
|
||||
void i915_request_unsubmit(struct i915_request *request);
|
||||
|
||||
/* Note: part of the intel_breadcrumbs family */
|
||||
bool i915_request_enable_breadcrumb(struct i915_request *request);
|
||||
void i915_request_cancel_breadcrumb(struct i915_request *request);
|
||||
|
||||
long i915_request_wait(struct i915_request *rq,
|
||||
unsigned int flags,
|
||||
long timeout)
|
||||
|
@ -164,9 +164,13 @@ static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
|
||||
|
||||
do {
|
||||
list_for_each_entry_safe(pos, next, &x->head, entry) {
|
||||
pos->func(pos,
|
||||
TASK_NORMAL, fence->error,
|
||||
&extra);
|
||||
int wake_flags;
|
||||
|
||||
wake_flags = fence->error;
|
||||
if (pos->func == autoremove_wake_function)
|
||||
wake_flags = 0;
|
||||
|
||||
pos->func(pos, TASK_NORMAL, wake_flags, &extra);
|
||||
}
|
||||
|
||||
if (list_empty(&extra))
|
||||
|
@ -291,6 +291,8 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
|
||||
|
||||
struct i915_vma_work {
|
||||
struct dma_fence_work base;
|
||||
struct i915_address_space *vm;
|
||||
struct i915_vm_pt_stash stash;
|
||||
struct i915_vma *vma;
|
||||
struct drm_i915_gem_object *pinned;
|
||||
struct i915_sw_dma_fence_cb cb;
|
||||
@ -302,13 +304,10 @@ static int __vma_bind(struct dma_fence_work *work)
|
||||
{
|
||||
struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
|
||||
struct i915_vma *vma = vw->vma;
|
||||
int err;
|
||||
|
||||
err = vma->ops->bind_vma(vma->vm, vma, vw->cache_level, vw->flags);
|
||||
if (err)
|
||||
atomic_or(I915_VMA_ERROR, &vma->flags);
|
||||
|
||||
return err;
|
||||
vma->ops->bind_vma(vw->vm, &vw->stash,
|
||||
vma, vw->cache_level, vw->flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __vma_release(struct dma_fence_work *work)
|
||||
@ -317,6 +316,9 @@ static void __vma_release(struct dma_fence_work *work)
|
||||
|
||||
if (vw->pinned)
|
||||
__i915_gem_object_unpin_pages(vw->pinned);
|
||||
|
||||
i915_vm_free_pt_stash(vw->vm, &vw->stash);
|
||||
i915_vm_put(vw->vm);
|
||||
}
|
||||
|
||||
static const struct dma_fence_work_ops bind_ops = {
|
||||
@ -376,7 +378,6 @@ int i915_vma_bind(struct i915_vma *vma,
|
||||
{
|
||||
u32 bind_flags;
|
||||
u32 vma_flags;
|
||||
int ret;
|
||||
|
||||
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
|
||||
GEM_BUG_ON(vma->size > vma->node.size);
|
||||
@ -433,9 +434,7 @@ int i915_vma_bind(struct i915_vma *vma,
|
||||
work->pinned = vma->obj;
|
||||
}
|
||||
} else {
|
||||
ret = vma->ops->bind_vma(vma->vm, vma, cache_level, bind_flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
|
||||
}
|
||||
|
||||
atomic_or(bind_flags, &vma->flags);
|
||||
@ -853,13 +852,19 @@ static void vma_unbind_pages(struct i915_vma *vma)
|
||||
__vma_put_pages(vma, count | count << I915_VMA_PAGES_BIAS);
|
||||
}
|
||||
|
||||
int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
|
||||
int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
|
||||
u64 size, u64 alignment, u64 flags)
|
||||
{
|
||||
struct i915_vma_work *work = NULL;
|
||||
intel_wakeref_t wakeref = 0;
|
||||
unsigned int bound;
|
||||
int err;
|
||||
|
||||
#ifdef CONFIG_PROVE_LOCKING
|
||||
if (debug_locks && lockdep_is_held(&vma->vm->i915->drm.struct_mutex))
|
||||
WARN_ON(!ww);
|
||||
#endif
|
||||
|
||||
BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
|
||||
BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND);
|
||||
|
||||
@ -873,17 +878,31 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (flags & PIN_GLOBAL)
|
||||
wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
|
||||
|
||||
if (flags & vma->vm->bind_async_flags) {
|
||||
work = i915_vma_work();
|
||||
if (!work) {
|
||||
err = -ENOMEM;
|
||||
goto err_pages;
|
||||
goto err_rpm;
|
||||
}
|
||||
|
||||
work->vm = i915_vm_get(vma->vm);
|
||||
|
||||
/* Allocate enough page directories to used PTE */
|
||||
if (vma->vm->allocate_va_range) {
|
||||
i915_vm_alloc_pt_stash(vma->vm,
|
||||
&work->stash,
|
||||
vma->size);
|
||||
|
||||
err = i915_vm_pin_pt_stash(vma->vm,
|
||||
&work->stash);
|
||||
if (err)
|
||||
goto err_fence;
|
||||
}
|
||||
}
|
||||
|
||||
if (flags & PIN_GLOBAL)
|
||||
wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
|
||||
|
||||
/*
|
||||
* Differentiate between user/kernel vma inside the aliasing-ppgtt.
|
||||
*
|
||||
@ -971,9 +990,9 @@ err_unlock:
|
||||
err_fence:
|
||||
if (work)
|
||||
dma_fence_work_commit_imm(&work->base);
|
||||
err_rpm:
|
||||
if (wakeref)
|
||||
intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
|
||||
err_pages:
|
||||
vma_put_pages(vma);
|
||||
return err;
|
||||
}
|
||||
@ -989,7 +1008,8 @@ static void flush_idle_contexts(struct intel_gt *gt)
|
||||
intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
|
||||
}
|
||||
|
||||
int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags)
|
||||
int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
|
||||
u32 align, unsigned int flags)
|
||||
{
|
||||
struct i915_address_space *vm = vma->vm;
|
||||
int err;
|
||||
@ -997,7 +1017,7 @@ int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags)
|
||||
GEM_BUG_ON(!i915_vma_is_ggtt(vma));
|
||||
|
||||
do {
|
||||
err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL);
|
||||
err = i915_vma_pin_ww(vma, ww, 0, align, flags | PIN_GLOBAL);
|
||||
if (err != -ENOSPC) {
|
||||
if (!err) {
|
||||
err = i915_vma_wait_for_bind(vma);
|
||||
@ -1167,6 +1187,12 @@ void i915_vma_revoke_mmap(struct i915_vma *vma)
|
||||
list_del(&vma->obj->userfault_link);
|
||||
}
|
||||
|
||||
static int
|
||||
__i915_request_await_bind(struct i915_request *rq, struct i915_vma *vma)
|
||||
{
|
||||
return __i915_request_await_exclusive(rq, &vma->active);
|
||||
}
|
||||
|
||||
int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
|
||||
{
|
||||
int err;
|
||||
@ -1174,8 +1200,7 @@ int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
|
||||
GEM_BUG_ON(!i915_vma_is_pinned(vma));
|
||||
|
||||
/* Wait for the vma to be bound before we start! */
|
||||
err = i915_request_await_active(rq, &vma->active,
|
||||
I915_ACTIVE_AWAIT_EXCL);
|
||||
err = __i915_request_await_bind(rq, vma);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
|
@ -237,8 +237,17 @@ static inline void i915_vma_unlock(struct i915_vma *vma)
|
||||
}
|
||||
|
||||
int __must_check
|
||||
i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags);
|
||||
int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags);
|
||||
i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
|
||||
u64 size, u64 alignment, u64 flags);
|
||||
|
||||
static inline int __must_check
|
||||
i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
|
||||
{
|
||||
return i915_vma_pin_ww(vma, NULL, size, alignment, flags);
|
||||
}
|
||||
|
||||
int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
|
||||
u32 align, unsigned int flags);
|
||||
|
||||
static inline int i915_vma_pin_count(const struct i915_vma *vma)
|
||||
{
|
||||
|
@ -199,11 +199,52 @@ out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int igt_gem_ww_ctx(void *arg)
|
||||
{
|
||||
struct drm_i915_private *i915 = arg;
|
||||
struct drm_i915_gem_object *obj, *obj2;
|
||||
struct i915_gem_ww_ctx ww;
|
||||
int err = 0;
|
||||
|
||||
obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
|
||||
if (IS_ERR(obj))
|
||||
return PTR_ERR(obj);
|
||||
|
||||
obj2 = i915_gem_object_create_internal(i915, PAGE_SIZE);
|
||||
if (IS_ERR(obj)) {
|
||||
err = PTR_ERR(obj);
|
||||
goto put1;
|
||||
}
|
||||
|
||||
i915_gem_ww_ctx_init(&ww, true);
|
||||
retry:
|
||||
/* Lock the objects, twice for good measure (-EALREADY handling) */
|
||||
err = i915_gem_object_lock(obj, &ww);
|
||||
if (!err)
|
||||
err = i915_gem_object_lock_interruptible(obj, &ww);
|
||||
if (!err)
|
||||
err = i915_gem_object_lock_interruptible(obj2, &ww);
|
||||
if (!err)
|
||||
err = i915_gem_object_lock(obj2, &ww);
|
||||
|
||||
if (err == -EDEADLK) {
|
||||
err = i915_gem_ww_ctx_backoff(&ww);
|
||||
if (!err)
|
||||
goto retry;
|
||||
}
|
||||
i915_gem_ww_ctx_fini(&ww);
|
||||
i915_gem_object_put(obj2);
|
||||
put1:
|
||||
i915_gem_object_put(obj);
|
||||
return err;
|
||||
}
|
||||
|
||||
int i915_gem_live_selftests(struct drm_i915_private *i915)
|
||||
{
|
||||
static const struct i915_subtest tests[] = {
|
||||
SUBTEST(igt_gem_suspend),
|
||||
SUBTEST(igt_gem_hibernate),
|
||||
SUBTEST(igt_gem_ww_ctx),
|
||||
};
|
||||
|
||||
if (intel_gt_is_wedged(&i915->gt))
|
||||
|
@ -172,35 +172,45 @@ static int igt_ppgtt_alloc(void *arg)
|
||||
|
||||
/* Check we can allocate the entire range */
|
||||
for (size = 4096; size <= limit; size <<= 2) {
|
||||
err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, size);
|
||||
struct i915_vm_pt_stash stash = {};
|
||||
|
||||
err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, size);
|
||||
if (err)
|
||||
goto err_ppgtt_cleanup;
|
||||
|
||||
err = i915_vm_pin_pt_stash(&ppgtt->vm, &stash);
|
||||
if (err) {
|
||||
if (err == -ENOMEM) {
|
||||
pr_info("[1] Ran out of memory for va_range [0 + %llx] [bit %d]\n",
|
||||
size, ilog2(size));
|
||||
err = 0; /* virtual space too large! */
|
||||
}
|
||||
i915_vm_free_pt_stash(&ppgtt->vm, &stash);
|
||||
goto err_ppgtt_cleanup;
|
||||
}
|
||||
|
||||
ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, size);
|
||||
cond_resched();
|
||||
|
||||
ppgtt->vm.clear_range(&ppgtt->vm, 0, size);
|
||||
|
||||
i915_vm_free_pt_stash(&ppgtt->vm, &stash);
|
||||
}
|
||||
|
||||
/* Check we can incrementally allocate the entire range */
|
||||
for (last = 0, size = 4096; size <= limit; last = size, size <<= 2) {
|
||||
err = ppgtt->vm.allocate_va_range(&ppgtt->vm,
|
||||
last, size - last);
|
||||
struct i915_vm_pt_stash stash = {};
|
||||
|
||||
err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, size - last);
|
||||
if (err)
|
||||
goto err_ppgtt_cleanup;
|
||||
|
||||
err = i915_vm_pin_pt_stash(&ppgtt->vm, &stash);
|
||||
if (err) {
|
||||
if (err == -ENOMEM) {
|
||||
pr_info("[2] Ran out of memory for va_range [%llx + %llx] [bit %d]\n",
|
||||
last, size - last, ilog2(size));
|
||||
err = 0; /* virtual space too large! */
|
||||
}
|
||||
i915_vm_free_pt_stash(&ppgtt->vm, &stash);
|
||||
goto err_ppgtt_cleanup;
|
||||
}
|
||||
|
||||
ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash,
|
||||
last, size - last);
|
||||
cond_resched();
|
||||
|
||||
i915_vm_free_pt_stash(&ppgtt->vm, &stash);
|
||||
}
|
||||
|
||||
err_ppgtt_cleanup:
|
||||
@ -284,9 +294,23 @@ static int lowlevel_hole(struct i915_address_space *vm,
|
||||
break;
|
||||
}
|
||||
|
||||
if (vm->allocate_va_range &&
|
||||
vm->allocate_va_range(vm, addr, BIT_ULL(size)))
|
||||
break;
|
||||
if (vm->allocate_va_range) {
|
||||
struct i915_vm_pt_stash stash = {};
|
||||
|
||||
if (i915_vm_alloc_pt_stash(vm, &stash,
|
||||
BIT_ULL(size)))
|
||||
break;
|
||||
|
||||
if (i915_vm_pin_pt_stash(vm, &stash)) {
|
||||
i915_vm_free_pt_stash(vm, &stash);
|
||||
break;
|
||||
}
|
||||
|
||||
vm->allocate_va_range(vm, &stash,
|
||||
addr, BIT_ULL(size));
|
||||
|
||||
i915_vm_free_pt_stash(vm, &stash);
|
||||
}
|
||||
|
||||
mock_vma->pages = obj->mm.pages;
|
||||
mock_vma->node.size = BIT_ULL(size);
|
||||
@ -1881,6 +1905,7 @@ static int igt_cs_tlb(void *arg)
|
||||
continue;
|
||||
|
||||
while (!__igt_timeout(end_time, NULL)) {
|
||||
struct i915_vm_pt_stash stash = {};
|
||||
struct i915_request *rq;
|
||||
u64 offset;
|
||||
|
||||
@ -1888,10 +1913,6 @@ static int igt_cs_tlb(void *arg)
|
||||
0, vm->total - PAGE_SIZE,
|
||||
chunk_size, PAGE_SIZE);
|
||||
|
||||
err = vm->allocate_va_range(vm, offset, chunk_size);
|
||||
if (err)
|
||||
goto end;
|
||||
|
||||
memset32(result, STACK_MAGIC, PAGE_SIZE / sizeof(u32));
|
||||
|
||||
vma = i915_vma_instance(bbe, vm, NULL);
|
||||
@ -1904,6 +1925,20 @@ static int igt_cs_tlb(void *arg)
|
||||
if (err)
|
||||
goto end;
|
||||
|
||||
err = i915_vm_alloc_pt_stash(vm, &stash, chunk_size);
|
||||
if (err)
|
||||
goto end;
|
||||
|
||||
err = i915_vm_pin_pt_stash(vm, &stash);
|
||||
if (err) {
|
||||
i915_vm_free_pt_stash(vm, &stash);
|
||||
goto end;
|
||||
}
|
||||
|
||||
vm->allocate_va_range(vm, &stash, offset, chunk_size);
|
||||
|
||||
i915_vm_free_pt_stash(vm, &stash);
|
||||
|
||||
/* Prime the TLB with the dummy pages */
|
||||
for (i = 0; i < count; i++) {
|
||||
vma->node.start = offset + i * PAGE_SIZE;
|
||||
|
@ -307,7 +307,7 @@ static int live_noa_gpr(void *arg)
|
||||
}
|
||||
|
||||
/* Poison the ce->vm so we detect writes not to the GGTT gt->scratch */
|
||||
scratch = kmap(ce->vm->scratch[0].base.page);
|
||||
scratch = kmap(__px_page(ce->vm->scratch[0]));
|
||||
memset(scratch, POISON_FREE, PAGE_SIZE);
|
||||
|
||||
rq = intel_context_create_request(ce);
|
||||
@ -405,7 +405,7 @@ static int live_noa_gpr(void *arg)
|
||||
out_rq:
|
||||
i915_request_put(rq);
|
||||
out_ce:
|
||||
kunmap(ce->vm->scratch[0].base.page);
|
||||
kunmap(__px_page(ce->vm->scratch[0]));
|
||||
intel_context_put(ce);
|
||||
out:
|
||||
stream_destroy(stream);
|
||||
|
@ -862,6 +862,8 @@ static int live_all_engines(void *arg)
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
i915_vma_lock(batch);
|
||||
|
||||
idx = 0;
|
||||
for_each_uabi_engine(engine, i915) {
|
||||
request[idx] = intel_engine_create_kernel_request(engine);
|
||||
@ -872,11 +874,9 @@ static int live_all_engines(void *arg)
|
||||
goto out_request;
|
||||
}
|
||||
|
||||
i915_vma_lock(batch);
|
||||
err = i915_request_await_object(request[idx], batch->obj, 0);
|
||||
if (err == 0)
|
||||
err = i915_vma_move_to_active(batch, request[idx], 0);
|
||||
i915_vma_unlock(batch);
|
||||
GEM_BUG_ON(err);
|
||||
|
||||
err = engine->emit_bb_start(request[idx],
|
||||
@ -891,6 +891,8 @@ static int live_all_engines(void *arg)
|
||||
idx++;
|
||||
}
|
||||
|
||||
i915_vma_unlock(batch);
|
||||
|
||||
idx = 0;
|
||||
for_each_uabi_engine(engine, i915) {
|
||||
if (i915_request_completed(request[idx])) {
|
||||
@ -981,12 +983,13 @@ static int live_sequential_engines(void *arg)
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
i915_vma_lock(batch);
|
||||
request[idx] = intel_engine_create_kernel_request(engine);
|
||||
if (IS_ERR(request[idx])) {
|
||||
err = PTR_ERR(request[idx]);
|
||||
pr_err("%s: Request allocation failed for %s with err=%d\n",
|
||||
__func__, engine->name, err);
|
||||
goto out_request;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (prev) {
|
||||
@ -996,16 +999,14 @@ static int live_sequential_engines(void *arg)
|
||||
i915_request_add(request[idx]);
|
||||
pr_err("%s: Request await failed for %s with err=%d\n",
|
||||
__func__, engine->name, err);
|
||||
goto out_request;
|
||||
goto out_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
i915_vma_lock(batch);
|
||||
err = i915_request_await_object(request[idx],
|
||||
batch->obj, false);
|
||||
if (err == 0)
|
||||
err = i915_vma_move_to_active(batch, request[idx], 0);
|
||||
i915_vma_unlock(batch);
|
||||
GEM_BUG_ON(err);
|
||||
|
||||
err = engine->emit_bb_start(request[idx],
|
||||
@ -1020,6 +1021,11 @@ static int live_sequential_engines(void *arg)
|
||||
|
||||
prev = request[idx];
|
||||
idx++;
|
||||
|
||||
out_unlock:
|
||||
i915_vma_unlock(batch);
|
||||
if (err)
|
||||
goto out_request;
|
||||
}
|
||||
|
||||
idx = 0;
|
||||
|
@ -892,7 +892,7 @@ static int igt_vma_remapped_gtt(void *arg)
|
||||
unsigned int x, y;
|
||||
int err;
|
||||
|
||||
i915_gem_object_lock(obj);
|
||||
i915_gem_object_lock(obj, NULL);
|
||||
err = i915_gem_object_set_to_gtt_domain(obj, true);
|
||||
i915_gem_object_unlock(obj);
|
||||
if (err)
|
||||
|
@ -509,7 +509,7 @@ static int igt_lmem_write_cpu(void *arg)
|
||||
if (err)
|
||||
goto out_unpin;
|
||||
|
||||
i915_gem_object_lock(obj);
|
||||
i915_gem_object_lock(obj, NULL);
|
||||
err = i915_gem_object_set_to_wc_domain(obj, true);
|
||||
i915_gem_object_unlock(obj);
|
||||
if (err)
|
||||
@ -522,9 +522,9 @@ static int igt_lmem_write_cpu(void *arg)
|
||||
goto out_unpin;
|
||||
}
|
||||
|
||||
/* We want to throw in a random width/align */
|
||||
bytes[0] = igt_random_offset(&prng, 0, PAGE_SIZE, sizeof(u32),
|
||||
sizeof(u32));
|
||||
/* A random multiple of u32, picked between [64, PAGE_SIZE - 64] */
|
||||
bytes[0] = igt_random_offset(&prng, 64, PAGE_SIZE - 64, 0, sizeof(u32));
|
||||
GEM_BUG_ON(!IS_ALIGNED(bytes[0], sizeof(u32)));
|
||||
|
||||
i = 0;
|
||||
do {
|
||||
|
@ -38,14 +38,14 @@ static void mock_insert_entries(struct i915_address_space *vm,
|
||||
{
|
||||
}
|
||||
|
||||
static int mock_bind_ppgtt(struct i915_address_space *vm,
|
||||
struct i915_vma *vma,
|
||||
enum i915_cache_level cache_level,
|
||||
u32 flags)
|
||||
static void mock_bind_ppgtt(struct i915_address_space *vm,
|
||||
struct i915_vm_pt_stash *stash,
|
||||
struct i915_vma *vma,
|
||||
enum i915_cache_level cache_level,
|
||||
u32 flags)
|
||||
{
|
||||
GEM_BUG_ON(flags & I915_VMA_GLOBAL_BIND);
|
||||
set_bit(I915_VMA_LOCAL_BIND_BIT, __i915_vma_flags(vma));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mock_unbind_ppgtt(struct i915_address_space *vm,
|
||||
@ -74,9 +74,12 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name)
|
||||
ppgtt->vm.i915 = i915;
|
||||
ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE);
|
||||
ppgtt->vm.file = ERR_PTR(-ENODEV);
|
||||
ppgtt->vm.dma = &i915->drm.pdev->dev;
|
||||
|
||||
i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
|
||||
|
||||
ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
|
||||
|
||||
ppgtt->vm.clear_range = mock_clear_range;
|
||||
ppgtt->vm.insert_page = mock_insert_page;
|
||||
ppgtt->vm.insert_entries = mock_insert_entries;
|
||||
@ -90,13 +93,12 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name)
|
||||
return ppgtt;
|
||||
}
|
||||
|
||||
static int mock_bind_ggtt(struct i915_address_space *vm,
|
||||
struct i915_vma *vma,
|
||||
enum i915_cache_level cache_level,
|
||||
u32 flags)
|
||||
static void mock_bind_ggtt(struct i915_address_space *vm,
|
||||
struct i915_vm_pt_stash *stash,
|
||||
struct i915_vma *vma,
|
||||
enum i915_cache_level cache_level,
|
||||
u32 flags)
|
||||
{
|
||||
atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mock_unbind_ggtt(struct i915_address_space *vm,
|
||||
@ -116,6 +118,8 @@ void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt)
|
||||
ggtt->mappable_end = resource_size(&ggtt->gmadr);
|
||||
ggtt->vm.total = 4096 * PAGE_SIZE;
|
||||
|
||||
ggtt->vm.alloc_pt_dma = alloc_pt_dma;
|
||||
|
||||
ggtt->vm.clear_range = mock_clear_range;
|
||||
ggtt->vm.insert_page = mock_insert_page;
|
||||
ggtt->vm.insert_entries = mock_insert_entries;
|
||||
|
Loading…
Reference in New Issue
Block a user