forked from Minki/linux
Merge tag 'drm-intel-next-fixes-2020-06-04' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
- Includes gvt-next-fixes-2020-05-28 - Use after free fix for display global state. - Whitelisting context-local timestamp on Gen9 and two scheduler fixes with deps (Cc: stable) - Removal of write flag from sysfs files where ineffective Signed-off-by: Dave Airlie <airlied@redhat.com> From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200604150454.GA59322@jlahtine-desk.ger.corp.intel.com
This commit is contained in:
commit
8d286e2ff4
@ -10,6 +10,28 @@
|
||||
#include "intel_display_types.h"
|
||||
#include "intel_global_state.h"
|
||||
|
||||
static void __intel_atomic_global_state_free(struct kref *kref)
|
||||
{
|
||||
struct intel_global_state *obj_state =
|
||||
container_of(kref, struct intel_global_state, ref);
|
||||
struct intel_global_obj *obj = obj_state->obj;
|
||||
|
||||
obj->funcs->atomic_destroy_state(obj, obj_state);
|
||||
}
|
||||
|
||||
static void intel_atomic_global_state_put(struct intel_global_state *obj_state)
|
||||
{
|
||||
kref_put(&obj_state->ref, __intel_atomic_global_state_free);
|
||||
}
|
||||
|
||||
static struct intel_global_state *
|
||||
intel_atomic_global_state_get(struct intel_global_state *obj_state)
|
||||
{
|
||||
kref_get(&obj_state->ref);
|
||||
|
||||
return obj_state;
|
||||
}
|
||||
|
||||
void intel_atomic_global_obj_init(struct drm_i915_private *dev_priv,
|
||||
struct intel_global_obj *obj,
|
||||
struct intel_global_state *state,
|
||||
@ -17,6 +39,10 @@ void intel_atomic_global_obj_init(struct drm_i915_private *dev_priv,
|
||||
{
|
||||
memset(obj, 0, sizeof(*obj));
|
||||
|
||||
state->obj = obj;
|
||||
|
||||
kref_init(&state->ref);
|
||||
|
||||
obj->state = state;
|
||||
obj->funcs = funcs;
|
||||
list_add_tail(&obj->head, &dev_priv->global_obj_list);
|
||||
@ -28,7 +54,9 @@ void intel_atomic_global_obj_cleanup(struct drm_i915_private *dev_priv)
|
||||
|
||||
list_for_each_entry_safe(obj, next, &dev_priv->global_obj_list, head) {
|
||||
list_del(&obj->head);
|
||||
obj->funcs->atomic_destroy_state(obj, obj->state);
|
||||
|
||||
drm_WARN_ON(&dev_priv->drm, kref_read(&obj->state->ref) != 1);
|
||||
intel_atomic_global_state_put(obj->state);
|
||||
}
|
||||
}
|
||||
|
||||
@ -97,10 +125,14 @@ intel_atomic_get_global_obj_state(struct intel_atomic_state *state,
|
||||
if (!obj_state)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
obj_state->obj = obj;
|
||||
obj_state->changed = false;
|
||||
|
||||
kref_init(&obj_state->ref);
|
||||
|
||||
state->global_objs[index].state = obj_state;
|
||||
state->global_objs[index].old_state = obj->state;
|
||||
state->global_objs[index].old_state =
|
||||
intel_atomic_global_state_get(obj->state);
|
||||
state->global_objs[index].new_state = obj_state;
|
||||
state->global_objs[index].ptr = obj;
|
||||
obj_state->state = state;
|
||||
@ -163,7 +195,9 @@ void intel_atomic_swap_global_state(struct intel_atomic_state *state)
|
||||
new_obj_state->state = NULL;
|
||||
|
||||
state->global_objs[i].state = old_obj_state;
|
||||
obj->state = new_obj_state;
|
||||
|
||||
intel_atomic_global_state_put(obj->state);
|
||||
obj->state = intel_atomic_global_state_get(new_obj_state);
|
||||
}
|
||||
}
|
||||
|
||||
@ -172,10 +206,9 @@ void intel_atomic_clear_global_state(struct intel_atomic_state *state)
|
||||
int i;
|
||||
|
||||
for (i = 0; i < state->num_global_objs; i++) {
|
||||
struct intel_global_obj *obj = state->global_objs[i].ptr;
|
||||
intel_atomic_global_state_put(state->global_objs[i].old_state);
|
||||
intel_atomic_global_state_put(state->global_objs[i].new_state);
|
||||
|
||||
obj->funcs->atomic_destroy_state(obj,
|
||||
state->global_objs[i].state);
|
||||
state->global_objs[i].ptr = NULL;
|
||||
state->global_objs[i].state = NULL;
|
||||
state->global_objs[i].old_state = NULL;
|
||||
|
@ -6,6 +6,7 @@
|
||||
#ifndef __INTEL_GLOBAL_STATE_H__
|
||||
#define __INTEL_GLOBAL_STATE_H__
|
||||
|
||||
#include <linux/kref.h>
|
||||
#include <linux/list.h>
|
||||
|
||||
struct drm_i915_private;
|
||||
@ -54,7 +55,9 @@ struct intel_global_obj {
|
||||
for_each_if(obj)
|
||||
|
||||
struct intel_global_state {
|
||||
struct intel_global_obj *obj;
|
||||
struct intel_atomic_state *state;
|
||||
struct kref ref;
|
||||
bool changed;
|
||||
};
|
||||
|
||||
|
@ -230,7 +230,7 @@ static void intel_context_set_gem(struct intel_context *ce,
|
||||
ce->timeline = intel_timeline_get(ctx->timeline);
|
||||
|
||||
if (ctx->sched.priority >= I915_PRIORITY_NORMAL &&
|
||||
intel_engine_has_semaphores(ce->engine))
|
||||
intel_engine_has_timeslices(ce->engine))
|
||||
__set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
|
||||
}
|
||||
|
||||
@ -1969,7 +1969,7 @@ static int __apply_priority(struct intel_context *ce, void *arg)
|
||||
{
|
||||
struct i915_gem_context *ctx = arg;
|
||||
|
||||
if (!intel_engine_has_semaphores(ce->engine))
|
||||
if (!intel_engine_has_timeslices(ce->engine))
|
||||
return 0;
|
||||
|
||||
if (ctx->sched.priority >= I915_PRIORITY_NORMAL)
|
||||
|
@ -39,7 +39,6 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
|
||||
unsigned long last_pfn = 0; /* suppress gcc warning */
|
||||
unsigned int max_segment = i915_sg_segment_size();
|
||||
unsigned int sg_page_sizes;
|
||||
struct pagevec pvec;
|
||||
gfp_t noreclaim;
|
||||
int ret;
|
||||
|
||||
@ -192,13 +191,17 @@ err_sg:
|
||||
sg_mark_end(sg);
|
||||
err_pages:
|
||||
mapping_clear_unevictable(mapping);
|
||||
pagevec_init(&pvec);
|
||||
for_each_sgt_page(page, sgt_iter, st) {
|
||||
if (!pagevec_add(&pvec, page))
|
||||
if (sg != st->sgl) {
|
||||
struct pagevec pvec;
|
||||
|
||||
pagevec_init(&pvec);
|
||||
for_each_sgt_page(page, sgt_iter, st) {
|
||||
if (!pagevec_add(&pvec, page))
|
||||
check_release_pagevec(&pvec);
|
||||
}
|
||||
if (pagevec_count(&pvec))
|
||||
check_release_pagevec(&pvec);
|
||||
}
|
||||
if (pagevec_count(&pvec))
|
||||
check_release_pagevec(&pvec);
|
||||
sg_free_table(st);
|
||||
kfree(st);
|
||||
|
||||
|
@ -97,8 +97,6 @@ int __intel_context_do_pin(struct intel_context *ce)
|
||||
{
|
||||
int err;
|
||||
|
||||
GEM_BUG_ON(intel_context_is_closed(ce));
|
||||
|
||||
if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
|
||||
err = intel_context_alloc_state(ce);
|
||||
if (err)
|
||||
|
@ -124,7 +124,7 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
|
||||
*/
|
||||
low_avail = gvt_aperture_sz(gvt) - HOST_LOW_GM_SIZE;
|
||||
high_avail = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE;
|
||||
num_types = sizeof(vgpu_types) / sizeof(vgpu_types[0]);
|
||||
num_types = ARRAY_SIZE(vgpu_types);
|
||||
|
||||
gvt->types = kcalloc(num_types, sizeof(struct intel_vgpu_type),
|
||||
GFP_KERNEL);
|
||||
|
@ -572,6 +572,9 @@ struct drm_i915_reg_descriptor {
|
||||
#define REG32(_reg, ...) \
|
||||
{ .addr = (_reg), __VA_ARGS__ }
|
||||
|
||||
#define REG32_IDX(_reg, idx) \
|
||||
{ .addr = _reg(idx) }
|
||||
|
||||
/*
|
||||
* Convenience macro for adding 64-bit registers.
|
||||
*
|
||||
@ -669,6 +672,7 @@ static const struct drm_i915_reg_descriptor gen9_blt_regs[] = {
|
||||
REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE),
|
||||
REG32(BCS_SWCTRL),
|
||||
REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
|
||||
REG32_IDX(RING_CTX_TIMESTAMP, BLT_RING_BASE),
|
||||
REG64_IDX(BCS_GPR, 0),
|
||||
REG64_IDX(BCS_GPR, 1),
|
||||
REG64_IDX(BCS_GPR, 2),
|
||||
|
@ -173,7 +173,7 @@ i915_param_named(enable_gvt, bool, 0400,
|
||||
#endif
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM)
|
||||
i915_param_named_unsafe(fake_lmem_start, ulong, 0600,
|
||||
i915_param_named_unsafe(fake_lmem_start, ulong, 0400,
|
||||
"Fake LMEM start offset (default: 0)");
|
||||
#endif
|
||||
|
||||
|
@ -64,7 +64,7 @@ struct drm_printer;
|
||||
param(int, mmio_debug, -IS_ENABLED(CONFIG_DRM_I915_DEBUG_MMIO), 0600) \
|
||||
param(int, edp_vswing, 0, 0400) \
|
||||
param(unsigned int, reset, 3, 0600) \
|
||||
param(unsigned int, inject_probe_failure, 0, 0600) \
|
||||
param(unsigned int, inject_probe_failure, 0, 0) \
|
||||
param(int, fastboot, -1, 0600) \
|
||||
param(int, enable_dpcd_backlight, -1, 0600) \
|
||||
param(char *, force_probe, CONFIG_DRM_I915_FORCE_PROBE, 0400) \
|
||||
|
@ -121,8 +121,39 @@ static void i915_fence_release(struct dma_fence *fence)
|
||||
i915_sw_fence_fini(&rq->submit);
|
||||
i915_sw_fence_fini(&rq->semaphore);
|
||||
|
||||
/* Keep one request on each engine for reserved use under mempressure */
|
||||
if (!cmpxchg(&rq->engine->request_pool, NULL, rq))
|
||||
/*
|
||||
* Keep one request on each engine for reserved use under mempressure
|
||||
*
|
||||
* We do not hold a reference to the engine here and so have to be
|
||||
* very careful in what rq->engine we poke. The virtual engine is
|
||||
* referenced via the rq->context and we released that ref during
|
||||
* i915_request_retire(), ergo we must not dereference a virtual
|
||||
* engine here. Not that we would want to, as the only consumer of
|
||||
* the reserved engine->request_pool is the power management parking,
|
||||
* which must-not-fail, and that is only run on the physical engines.
|
||||
*
|
||||
* Since the request must have been executed to be have completed,
|
||||
* we know that it will have been processed by the HW and will
|
||||
* not be unsubmitted again, so rq->engine and rq->execution_mask
|
||||
* at this point is stable. rq->execution_mask will be a single
|
||||
* bit if the last and _only_ engine it could execution on was a
|
||||
* physical engine, if it's multiple bits then it started on and
|
||||
* could still be on a virtual engine. Thus if the mask is not a
|
||||
* power-of-two we assume that rq->engine may still be a virtual
|
||||
* engine and so a dangling invalid pointer that we cannot dereference
|
||||
*
|
||||
* For example, consider the flow of a bonded request through a virtual
|
||||
* engine. The request is created with a wide engine mask (all engines
|
||||
* that we might execute on). On processing the bond, the request mask
|
||||
* is reduced to one or more engines. If the request is subsequently
|
||||
* bound to a single engine, it will then be constrained to only
|
||||
* execute on that engine and never returned to the virtual engine
|
||||
* after timeslicing away, see __unwind_incomplete_requests(). Thus we
|
||||
* know that if the rq->execution_mask is a single bit, rq->engine
|
||||
* can be a physical engine with the exact corresponding mask.
|
||||
*/
|
||||
if (is_power_of_2(rq->execution_mask) &&
|
||||
!cmpxchg(&rq->engine->request_pool, NULL, rq))
|
||||
return;
|
||||
|
||||
kmem_cache_free(global.slab_requests, rq);
|
||||
@ -326,6 +357,53 @@ void i915_request_retire_upto(struct i915_request *rq)
|
||||
} while (i915_request_retire(tmp) && tmp != rq);
|
||||
}
|
||||
|
||||
static struct i915_request * const *
|
||||
__engine_active(struct intel_engine_cs *engine)
|
||||
{
|
||||
return READ_ONCE(engine->execlists.active);
|
||||
}
|
||||
|
||||
static bool __request_in_flight(const struct i915_request *signal)
|
||||
{
|
||||
struct i915_request * const *port, *rq;
|
||||
bool inflight = false;
|
||||
|
||||
if (!i915_request_is_ready(signal))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Even if we have unwound the request, it may still be on
|
||||
* the GPU (preempt-to-busy). If that request is inside an
|
||||
* unpreemptible critical section, it will not be removed. Some
|
||||
* GPU functions may even be stuck waiting for the paired request
|
||||
* (__await_execution) to be submitted and cannot be preempted
|
||||
* until the bond is executing.
|
||||
*
|
||||
* As we know that there are always preemption points between
|
||||
* requests, we know that only the currently executing request
|
||||
* may be still active even though we have cleared the flag.
|
||||
* However, we can't rely on our tracking of ELSP[0] to known
|
||||
* which request is currently active and so maybe stuck, as
|
||||
* the tracking maybe an event behind. Instead assume that
|
||||
* if the context is still inflight, then it is still active
|
||||
* even if the active flag has been cleared.
|
||||
*/
|
||||
if (!intel_context_inflight(signal->context))
|
||||
return false;
|
||||
|
||||
rcu_read_lock();
|
||||
for (port = __engine_active(signal->engine); (rq = *port); port++) {
|
||||
if (rq->context == signal->context) {
|
||||
inflight = i915_seqno_passed(rq->fence.seqno,
|
||||
signal->fence.seqno);
|
||||
break;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return inflight;
|
||||
}
|
||||
|
||||
static int
|
||||
__await_execution(struct i915_request *rq,
|
||||
struct i915_request *signal,
|
||||
@ -356,7 +434,7 @@ __await_execution(struct i915_request *rq,
|
||||
}
|
||||
|
||||
spin_lock_irq(&signal->lock);
|
||||
if (i915_request_is_active(signal)) {
|
||||
if (i915_request_is_active(signal) || __request_in_flight(signal)) {
|
||||
if (hook) {
|
||||
hook(rq, &signal->fence);
|
||||
i915_request_put(signal);
|
||||
@ -1022,148 +1100,6 @@ await_fence:
|
||||
I915_FENCE_GFP);
|
||||
}
|
||||
|
||||
static int
|
||||
i915_request_await_request(struct i915_request *to, struct i915_request *from)
|
||||
{
|
||||
int ret;
|
||||
|
||||
GEM_BUG_ON(to == from);
|
||||
GEM_BUG_ON(to->timeline == from->timeline);
|
||||
|
||||
if (i915_request_completed(from)) {
|
||||
i915_sw_fence_set_error_once(&to->submit, from->fence.error);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (to->engine->schedule) {
|
||||
ret = i915_sched_node_add_dependency(&to->sched,
|
||||
&from->sched,
|
||||
I915_DEPENDENCY_EXTERNAL);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (to->engine == from->engine)
|
||||
ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
|
||||
&from->submit,
|
||||
I915_FENCE_GFP);
|
||||
else
|
||||
ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mark_external(struct i915_request *rq)
|
||||
{
|
||||
/*
|
||||
* The downside of using semaphores is that we lose metadata passing
|
||||
* along the signaling chain. This is particularly nasty when we
|
||||
* need to pass along a fatal error such as EFAULT or EDEADLK. For
|
||||
* fatal errors we want to scrub the request before it is executed,
|
||||
* which means that we cannot preload the request onto HW and have
|
||||
* it wait upon a semaphore.
|
||||
*/
|
||||
rq->sched.flags |= I915_SCHED_HAS_EXTERNAL_CHAIN;
|
||||
}
|
||||
|
||||
static int
|
||||
__i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
|
||||
{
|
||||
mark_external(rq);
|
||||
return i915_sw_fence_await_dma_fence(&rq->submit, fence,
|
||||
i915_fence_context_timeout(rq->i915,
|
||||
fence->context),
|
||||
I915_FENCE_GFP);
|
||||
}
|
||||
|
||||
static int
|
||||
i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
|
||||
{
|
||||
struct dma_fence *iter;
|
||||
int err = 0;
|
||||
|
||||
if (!to_dma_fence_chain(fence))
|
||||
return __i915_request_await_external(rq, fence);
|
||||
|
||||
dma_fence_chain_for_each(iter, fence) {
|
||||
struct dma_fence_chain *chain = to_dma_fence_chain(iter);
|
||||
|
||||
if (!dma_fence_is_i915(chain->fence)) {
|
||||
err = __i915_request_await_external(rq, iter);
|
||||
break;
|
||||
}
|
||||
|
||||
err = i915_request_await_dma_fence(rq, chain->fence);
|
||||
if (err < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
dma_fence_put(iter);
|
||||
return err;
|
||||
}
|
||||
|
||||
int
|
||||
i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
|
||||
{
|
||||
struct dma_fence **child = &fence;
|
||||
unsigned int nchild = 1;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Note that if the fence-array was created in signal-on-any mode,
|
||||
* we should *not* decompose it into its individual fences. However,
|
||||
* we don't currently store which mode the fence-array is operating
|
||||
* in. Fortunately, the only user of signal-on-any is private to
|
||||
* amdgpu and we should not see any incoming fence-array from
|
||||
* sync-file being in signal-on-any mode.
|
||||
*/
|
||||
if (dma_fence_is_array(fence)) {
|
||||
struct dma_fence_array *array = to_dma_fence_array(fence);
|
||||
|
||||
child = array->fences;
|
||||
nchild = array->num_fences;
|
||||
GEM_BUG_ON(!nchild);
|
||||
}
|
||||
|
||||
do {
|
||||
fence = *child++;
|
||||
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
|
||||
i915_sw_fence_set_error_once(&rq->submit, fence->error);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Requests on the same timeline are explicitly ordered, along
|
||||
* with their dependencies, by i915_request_add() which ensures
|
||||
* that requests are submitted in-order through each ring.
|
||||
*/
|
||||
if (fence->context == rq->fence.context)
|
||||
continue;
|
||||
|
||||
/* Squash repeated waits to the same timelines */
|
||||
if (fence->context &&
|
||||
intel_timeline_sync_is_later(i915_request_timeline(rq),
|
||||
fence))
|
||||
continue;
|
||||
|
||||
if (dma_fence_is_i915(fence))
|
||||
ret = i915_request_await_request(rq, to_request(fence));
|
||||
else
|
||||
ret = i915_request_await_external(rq, fence);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
/* Record the latest fence used against each timeline */
|
||||
if (fence->context)
|
||||
intel_timeline_sync_set(i915_request_timeline(rq),
|
||||
fence);
|
||||
} while (--nchild);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
|
||||
struct dma_fence *fence)
|
||||
{
|
||||
@ -1251,6 +1187,55 @@ __i915_request_await_execution(struct i915_request *to,
|
||||
&from->fence);
|
||||
}
|
||||
|
||||
static void mark_external(struct i915_request *rq)
|
||||
{
|
||||
/*
|
||||
* The downside of using semaphores is that we lose metadata passing
|
||||
* along the signaling chain. This is particularly nasty when we
|
||||
* need to pass along a fatal error such as EFAULT or EDEADLK. For
|
||||
* fatal errors we want to scrub the request before it is executed,
|
||||
* which means that we cannot preload the request onto HW and have
|
||||
* it wait upon a semaphore.
|
||||
*/
|
||||
rq->sched.flags |= I915_SCHED_HAS_EXTERNAL_CHAIN;
|
||||
}
|
||||
|
||||
static int
|
||||
__i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
|
||||
{
|
||||
mark_external(rq);
|
||||
return i915_sw_fence_await_dma_fence(&rq->submit, fence,
|
||||
i915_fence_context_timeout(rq->i915,
|
||||
fence->context),
|
||||
I915_FENCE_GFP);
|
||||
}
|
||||
|
||||
static int
|
||||
i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
|
||||
{
|
||||
struct dma_fence *iter;
|
||||
int err = 0;
|
||||
|
||||
if (!to_dma_fence_chain(fence))
|
||||
return __i915_request_await_external(rq, fence);
|
||||
|
||||
dma_fence_chain_for_each(iter, fence) {
|
||||
struct dma_fence_chain *chain = to_dma_fence_chain(iter);
|
||||
|
||||
if (!dma_fence_is_i915(chain->fence)) {
|
||||
err = __i915_request_await_external(rq, iter);
|
||||
break;
|
||||
}
|
||||
|
||||
err = i915_request_await_dma_fence(rq, chain->fence);
|
||||
if (err < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
dma_fence_put(iter);
|
||||
return err;
|
||||
}
|
||||
|
||||
int
|
||||
i915_request_await_execution(struct i915_request *rq,
|
||||
struct dma_fence *fence,
|
||||
@ -1299,6 +1284,116 @@ i915_request_await_execution(struct i915_request *rq,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
await_request_submit(struct i915_request *to, struct i915_request *from)
|
||||
{
|
||||
/*
|
||||
* If we are waiting on a virtual engine, then it may be
|
||||
* constrained to execute on a single engine *prior* to submission.
|
||||
* When it is submitted, it will be first submitted to the virtual
|
||||
* engine and then passed to the physical engine. We cannot allow
|
||||
* the waiter to be submitted immediately to the physical engine
|
||||
* as it may then bypass the virtual request.
|
||||
*/
|
||||
if (to->engine == READ_ONCE(from->engine))
|
||||
return i915_sw_fence_await_sw_fence_gfp(&to->submit,
|
||||
&from->submit,
|
||||
I915_FENCE_GFP);
|
||||
else
|
||||
return __i915_request_await_execution(to, from, NULL);
|
||||
}
|
||||
|
||||
static int
|
||||
i915_request_await_request(struct i915_request *to, struct i915_request *from)
|
||||
{
|
||||
int ret;
|
||||
|
||||
GEM_BUG_ON(to == from);
|
||||
GEM_BUG_ON(to->timeline == from->timeline);
|
||||
|
||||
if (i915_request_completed(from)) {
|
||||
i915_sw_fence_set_error_once(&to->submit, from->fence.error);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (to->engine->schedule) {
|
||||
ret = i915_sched_node_add_dependency(&to->sched,
|
||||
&from->sched,
|
||||
I915_DEPENDENCY_EXTERNAL);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask)))
|
||||
ret = await_request_submit(to, from);
|
||||
else
|
||||
ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
|
||||
{
|
||||
struct dma_fence **child = &fence;
|
||||
unsigned int nchild = 1;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Note that if the fence-array was created in signal-on-any mode,
|
||||
* we should *not* decompose it into its individual fences. However,
|
||||
* we don't currently store which mode the fence-array is operating
|
||||
* in. Fortunately, the only user of signal-on-any is private to
|
||||
* amdgpu and we should not see any incoming fence-array from
|
||||
* sync-file being in signal-on-any mode.
|
||||
*/
|
||||
if (dma_fence_is_array(fence)) {
|
||||
struct dma_fence_array *array = to_dma_fence_array(fence);
|
||||
|
||||
child = array->fences;
|
||||
nchild = array->num_fences;
|
||||
GEM_BUG_ON(!nchild);
|
||||
}
|
||||
|
||||
do {
|
||||
fence = *child++;
|
||||
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
|
||||
i915_sw_fence_set_error_once(&rq->submit, fence->error);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Requests on the same timeline are explicitly ordered, along
|
||||
* with their dependencies, by i915_request_add() which ensures
|
||||
* that requests are submitted in-order through each ring.
|
||||
*/
|
||||
if (fence->context == rq->fence.context)
|
||||
continue;
|
||||
|
||||
/* Squash repeated waits to the same timelines */
|
||||
if (fence->context &&
|
||||
intel_timeline_sync_is_later(i915_request_timeline(rq),
|
||||
fence))
|
||||
continue;
|
||||
|
||||
if (dma_fence_is_i915(fence))
|
||||
ret = i915_request_await_request(rq, to_request(fence));
|
||||
else
|
||||
ret = i915_request_await_external(rq, fence);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
/* Record the latest fence used against each timeline */
|
||||
if (fence->context)
|
||||
intel_timeline_sync_set(i915_request_timeline(rq),
|
||||
fence);
|
||||
} while (--nchild);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_request_await_object - set this request to (async) wait upon a bo
|
||||
* @to: request we are wishing to use
|
||||
|
@ -209,14 +209,6 @@ static void kick_submission(struct intel_engine_cs *engine,
|
||||
if (!inflight)
|
||||
goto unlock;
|
||||
|
||||
ENGINE_TRACE(engine,
|
||||
"bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n",
|
||||
prio,
|
||||
rq->fence.context, rq->fence.seqno,
|
||||
inflight->fence.context, inflight->fence.seqno,
|
||||
inflight->sched.attr.priority);
|
||||
engine->execlists.queue_priority_hint = prio;
|
||||
|
||||
/*
|
||||
* If we are already the currently executing context, don't
|
||||
* bother evaluating if we should preempt ourselves.
|
||||
@ -224,6 +216,14 @@ static void kick_submission(struct intel_engine_cs *engine,
|
||||
if (inflight->context == rq->context)
|
||||
goto unlock;
|
||||
|
||||
ENGINE_TRACE(engine,
|
||||
"bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n",
|
||||
prio,
|
||||
rq->fence.context, rq->fence.seqno,
|
||||
inflight->fence.context, inflight->fence.seqno,
|
||||
inflight->sched.attr.priority);
|
||||
|
||||
engine->execlists.queue_priority_hint = prio;
|
||||
if (need_preempt(prio, rq_prio(inflight)))
|
||||
tasklet_hi_schedule(&engine->execlists.tasklet);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user