drm/i915: Remove requirement for holding i915_request.lock for breadcrumbs
Since the breadcrumb enabling/cancelling itself is serialised by the breadcrumbs.irq_lock, with a bit of care we can remove the outer serialisation with i915_request.lock for concurrent dma_fence_enable_signaling(). This has the important side-effect of eliminating the nested i915_request.lock within request submission. The challenge in serialisation is around the unsubmission where we take an active request that wants a breadcrumb on the signaling engine and put it to sleep. We do not want a concurrent dma_fence_enable_signaling() to attach a breadcrumb as we unsubmit, so we must mark the request as no longer active before serialising with the concurrent enable-signaling. On retire, we serialise with the concurrent enable-signaling, but instead of clearing ACTIVE, we mark it as SIGNALED. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200731154834.8378-1-chris@chris-wilson.co.uk Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> [Joonas: Rebased and reordered into drm-intel-gt-next branch] Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
This commit is contained in:
parent
af5c6fcf40
commit
c18636f763
@ -220,17 +220,17 @@ static void signal_irq_work(struct irq_work *work)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
|
static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
|
||||||
{
|
{
|
||||||
struct intel_engine_cs *engine =
|
struct intel_engine_cs *engine =
|
||||||
container_of(b, struct intel_engine_cs, breadcrumbs);
|
container_of(b, struct intel_engine_cs, breadcrumbs);
|
||||||
|
|
||||||
lockdep_assert_held(&b->irq_lock);
|
lockdep_assert_held(&b->irq_lock);
|
||||||
if (b->irq_armed)
|
if (b->irq_armed)
|
||||||
return true;
|
return;
|
||||||
|
|
||||||
if (!intel_gt_pm_get_if_awake(engine->gt))
|
if (!intel_gt_pm_get_if_awake(engine->gt))
|
||||||
return false;
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The breadcrumb irq will be disarmed on the interrupt after the
|
* The breadcrumb irq will be disarmed on the interrupt after the
|
||||||
@ -250,8 +250,6 @@ static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
|
|||||||
|
|
||||||
if (!b->irq_enabled++)
|
if (!b->irq_enabled++)
|
||||||
irq_enable(engine);
|
irq_enable(engine);
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
|
void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
|
||||||
@ -310,57 +308,99 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void insert_breadcrumb(struct i915_request *rq,
|
||||||
|
struct intel_breadcrumbs *b)
|
||||||
|
{
|
||||||
|
struct intel_context *ce = rq->context;
|
||||||
|
struct list_head *pos;
|
||||||
|
|
||||||
|
if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
|
||||||
|
return;
|
||||||
|
|
||||||
|
__intel_breadcrumbs_arm_irq(b);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We keep the seqno in retirement order, so we can break
|
||||||
|
* inside intel_engine_signal_breadcrumbs as soon as we've
|
||||||
|
* passed the last completed request (or seen a request that
|
||||||
|
* hasn't event started). We could walk the timeline->requests,
|
||||||
|
* but keeping a separate signalers_list has the advantage of
|
||||||
|
* hopefully being much smaller than the full list and so
|
||||||
|
* provides faster iteration and detection when there are no
|
||||||
|
* more interrupts required for this context.
|
||||||
|
*
|
||||||
|
* We typically expect to add new signalers in order, so we
|
||||||
|
* start looking for our insertion point from the tail of
|
||||||
|
* the list.
|
||||||
|
*/
|
||||||
|
list_for_each_prev(pos, &ce->signals) {
|
||||||
|
struct i915_request *it =
|
||||||
|
list_entry(pos, typeof(*it), signal_link);
|
||||||
|
|
||||||
|
if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
list_add(&rq->signal_link, pos);
|
||||||
|
if (pos == &ce->signals) /* catch transitions from empty list */
|
||||||
|
list_move_tail(&ce->signal_link, &b->signalers);
|
||||||
|
GEM_BUG_ON(!check_signal_order(ce, rq));
|
||||||
|
|
||||||
|
set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
|
||||||
|
}
|
||||||
|
|
||||||
bool i915_request_enable_breadcrumb(struct i915_request *rq)
|
bool i915_request_enable_breadcrumb(struct i915_request *rq)
|
||||||
{
|
{
|
||||||
lockdep_assert_held(&rq->lock);
|
struct intel_breadcrumbs *b;
|
||||||
|
|
||||||
|
/* Serialises with i915_request_retire() using rq->lock */
|
||||||
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
|
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
|
/*
|
||||||
struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
|
* Peek at i915_request_submit()/i915_request_unsubmit() status.
|
||||||
struct intel_context *ce = rq->context;
|
*
|
||||||
struct list_head *pos;
|
* If the request is not yet active (and not signaled), we will
|
||||||
|
* attach the breadcrumb later.
|
||||||
|
*/
|
||||||
|
if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
|
||||||
|
return true;
|
||||||
|
|
||||||
spin_lock(&b->irq_lock);
|
/*
|
||||||
|
* rq->engine is locked by rq->engine->active.lock. That however
|
||||||
if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
|
* is not known until after rq->engine has been dereferenced and
|
||||||
goto unlock;
|
* the lock acquired. Hence we acquire the lock and then validate
|
||||||
|
* that rq->engine still matches the lock we hold for it.
|
||||||
if (!__intel_breadcrumbs_arm_irq(b))
|
*
|
||||||
goto unlock;
|
* Here, we are using the breadcrumb lock as a proxy for the
|
||||||
|
* rq->engine->active.lock, and we know that since the breadcrumb
|
||||||
/*
|
* will be serialised within i915_request_submit/i915_request_unsubmit,
|
||||||
* We keep the seqno in retirement order, so we can break
|
* the engine cannot change while active as long as we hold the
|
||||||
* inside intel_engine_signal_breadcrumbs as soon as we've
|
* breadcrumb lock on that engine.
|
||||||
* passed the last completed request (or seen a request that
|
*
|
||||||
* hasn't event started). We could walk the timeline->requests,
|
* From the dma_fence_enable_signaling() path, we are outside of the
|
||||||
* but keeping a separate signalers_list has the advantage of
|
* request submit/unsubmit path, and so we must be more careful to
|
||||||
* hopefully being much smaller than the full list and so
|
* acquire the right lock.
|
||||||
* provides faster iteration and detection when there are no
|
*/
|
||||||
* more interrupts required for this context.
|
b = &READ_ONCE(rq->engine)->breadcrumbs;
|
||||||
*
|
spin_lock(&b->irq_lock);
|
||||||
* We typically expect to add new signalers in order, so we
|
while (unlikely(b != &READ_ONCE(rq->engine)->breadcrumbs)) {
|
||||||
* start looking for our insertion point from the tail of
|
|
||||||
* the list.
|
|
||||||
*/
|
|
||||||
list_for_each_prev(pos, &ce->signals) {
|
|
||||||
struct i915_request *it =
|
|
||||||
list_entry(pos, typeof(*it), signal_link);
|
|
||||||
|
|
||||||
if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno))
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
list_add(&rq->signal_link, pos);
|
|
||||||
if (pos == &ce->signals) /* catch transitions from empty list */
|
|
||||||
list_move_tail(&ce->signal_link, &b->signalers);
|
|
||||||
GEM_BUG_ON(!check_signal_order(ce, rq));
|
|
||||||
|
|
||||||
set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
|
|
||||||
unlock:
|
|
||||||
spin_unlock(&b->irq_lock);
|
spin_unlock(&b->irq_lock);
|
||||||
|
b = &READ_ONCE(rq->engine)->breadcrumbs;
|
||||||
|
spin_lock(&b->irq_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Now that we are finally serialised with request submit/unsubmit,
|
||||||
|
* [with b->irq_lock] and with i915_request_retire() [via checking
|
||||||
|
* SIGNALED with rq->lock] confirm the request is indeed active. If
|
||||||
|
* it is no longer active, the breadcrumb will be attached upon
|
||||||
|
* i915_request_submit().
|
||||||
|
*/
|
||||||
|
if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
|
||||||
|
insert_breadcrumb(rq, b);
|
||||||
|
|
||||||
|
spin_unlock(&b->irq_lock);
|
||||||
|
|
||||||
return !__request_completed(rq);
|
return !__request_completed(rq);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -368,8 +408,6 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
|
|||||||
{
|
{
|
||||||
struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
|
struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
|
||||||
|
|
||||||
lockdep_assert_held(&rq->lock);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We must wait for b->irq_lock so that we know the interrupt handler
|
* We must wait for b->irq_lock so that we know the interrupt handler
|
||||||
* has released its reference to the intel_context and has completed
|
* has released its reference to the intel_context and has completed
|
||||||
|
@ -1148,20 +1148,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
|
|||||||
} else {
|
} else {
|
||||||
struct intel_engine_cs *owner = rq->context->engine;
|
struct intel_engine_cs *owner = rq->context->engine;
|
||||||
|
|
||||||
/*
|
|
||||||
* Decouple the virtual breadcrumb before moving it
|
|
||||||
* back to the virtual engine -- we don't want the
|
|
||||||
* request to complete in the background and try
|
|
||||||
* and cancel the breadcrumb on the virtual engine
|
|
||||||
* (instead of the old engine where it is linked)!
|
|
||||||
*/
|
|
||||||
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
|
|
||||||
&rq->fence.flags)) {
|
|
||||||
spin_lock_nested(&rq->lock,
|
|
||||||
SINGLE_DEPTH_NESTING);
|
|
||||||
i915_request_cancel_breadcrumb(rq);
|
|
||||||
spin_unlock(&rq->lock);
|
|
||||||
}
|
|
||||||
WRITE_ONCE(rq->engine, owner);
|
WRITE_ONCE(rq->engine, owner);
|
||||||
owner->submit_request(rq);
|
owner->submit_request(rq);
|
||||||
active = NULL;
|
active = NULL;
|
||||||
|
@ -301,14 +301,24 @@ bool i915_request_retire(struct i915_request *rq)
|
|||||||
dma_fence_signal_locked(&rq->fence);
|
dma_fence_signal_locked(&rq->fence);
|
||||||
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
|
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
|
||||||
i915_request_cancel_breadcrumb(rq);
|
i915_request_cancel_breadcrumb(rq);
|
||||||
|
spin_unlock_irq(&rq->lock);
|
||||||
|
|
||||||
if (i915_request_has_waitboost(rq)) {
|
if (i915_request_has_waitboost(rq)) {
|
||||||
GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters));
|
GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters));
|
||||||
atomic_dec(&rq->engine->gt->rps.num_waiters);
|
atomic_dec(&rq->engine->gt->rps.num_waiters);
|
||||||
}
|
}
|
||||||
if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
|
|
||||||
set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
|
/*
|
||||||
__notify_execute_cb(rq);
|
* We only loosely track inflight requests across preemption,
|
||||||
}
|
* and so we may find ourselves attempting to retire a _completed_
|
||||||
|
* request that we have removed from the HW and put back on a run
|
||||||
|
* queue.
|
||||||
|
*
|
||||||
|
* As we set I915_FENCE_FLAG_ACTIVE on the request, this should be
|
||||||
|
* after removing the breadcrumb and signaling it, so that we do not
|
||||||
|
* inadvertently attach the breadcrumb to a completed request.
|
||||||
|
*/
|
||||||
|
remove_from_engine(rq);
|
||||||
GEM_BUG_ON(!llist_empty(&rq->execute_cb));
|
GEM_BUG_ON(!llist_empty(&rq->execute_cb));
|
||||||
spin_unlock_irq(&rq->lock);
|
spin_unlock_irq(&rq->lock);
|
||||||
|
|
||||||
@ -547,19 +557,21 @@ xfer:
|
|||||||
clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
|
clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* We may be recursing from the signal callback of another i915 fence */
|
/*
|
||||||
if (!i915_request_signaled(request)) {
|
* XXX Rollback bonded-execution on __i915_request_unsubmit()?
|
||||||
spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
|
*
|
||||||
|
* In the future, perhaps when we have an active time-slicing scheduler,
|
||||||
|
* it will be interesting to unsubmit parallel execution and remove
|
||||||
|
* busywaits from the GPU until their master is restarted. This is
|
||||||
|
* quite hairy, we have to carefully rollback the fence and do a
|
||||||
|
* preempt-to-idle cycle on the target engine, all the while the
|
||||||
|
* master execute_cb may refire.
|
||||||
|
*/
|
||||||
|
__notify_execute_cb(request);
|
||||||
|
|
||||||
__notify_execute_cb(request);
|
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
|
||||||
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
|
!i915_request_enable_breadcrumb(request))
|
||||||
&request->fence.flags) &&
|
intel_engine_signal_breadcrumbs(engine);
|
||||||
!i915_request_enable_breadcrumb(request))
|
|
||||||
intel_engine_signal_breadcrumbs(engine);
|
|
||||||
|
|
||||||
spin_unlock(&request->lock);
|
|
||||||
GEM_BUG_ON(!llist_empty(&request->execute_cb));
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -581,27 +593,27 @@ void __i915_request_unsubmit(struct i915_request *request)
|
|||||||
{
|
{
|
||||||
struct intel_engine_cs *engine = request->engine;
|
struct intel_engine_cs *engine = request->engine;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Only unwind in reverse order, required so that the per-context list
|
||||||
|
* is kept in seqno/ring order.
|
||||||
|
*/
|
||||||
RQ_TRACE(request, "\n");
|
RQ_TRACE(request, "\n");
|
||||||
|
|
||||||
GEM_BUG_ON(!irqs_disabled());
|
GEM_BUG_ON(!irqs_disabled());
|
||||||
lockdep_assert_held(&engine->active.lock);
|
lockdep_assert_held(&engine->active.lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Only unwind in reverse order, required so that the per-context list
|
* Before we remove this breadcrumb from the signal list, we have
|
||||||
* is kept in seqno/ring order.
|
* to ensure that a concurrent dma_fence_enable_signaling() does not
|
||||||
|
* attach itself. We first mark the request as no longer active and
|
||||||
|
* make sure that is visible to other cores, and then remove the
|
||||||
|
* breadcrumb if attached.
|
||||||
*/
|
*/
|
||||||
|
GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
|
||||||
/* We may be recursing from the signal callback of another i915 fence */
|
clear_bit_unlock(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
|
||||||
spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
|
|
||||||
|
|
||||||
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
|
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
|
||||||
i915_request_cancel_breadcrumb(request);
|
i915_request_cancel_breadcrumb(request);
|
||||||
|
|
||||||
GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
|
|
||||||
clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
|
|
||||||
|
|
||||||
spin_unlock(&request->lock);
|
|
||||||
|
|
||||||
/* We've already spun, don't charge on resubmitting. */
|
/* We've already spun, don't charge on resubmitting. */
|
||||||
if (request->sched.semaphores && i915_request_started(request))
|
if (request->sched.semaphores && i915_request_started(request))
|
||||||
request->sched.semaphores = 0;
|
request->sched.semaphores = 0;
|
||||||
|
Loading…
Reference in New Issue
Block a user