4797948071
Track the latest fence waited upon on each context, and only add a new asynchronous wait if the new fence is more recent than the recorded fence for that context. This requires us to filter out unordered timelines, which are noted by DMA_FENCE_NO_CONTEXT. However, in the absence of a universal identifier, we have to use our own i915->mm.unordered_timeline token. v2: Throw around the debug crutches v3: Inline the likely case of the pre-allocation cache being full. v4: Drop the pre-allocation support, we can lose the most recent fence in case of allocation failure -- it just means we may emit more awaits than strictly necessary but will not break. v5: Trim allocation size for leaf nodes, they only need an array of u32 not pointers. v6: Create mock_timeline to tidy selftest writing v7: s/intel_timeline_sync_get/intel_timeline_sync_is_later/ (Tvrtko) v8: Prune the stale sync points when we idle. v9: Include a small benchmark in the kselftests v10: Separate the idr implementation into its own compartment. (Tvrkto) v11: Refactor igt_sync kselftests to avoid deep nesting (Tvrkto) v12: __sync_leaf_idx() to assert that p->height is 0 when checking leaves v13: kselftests to investigate struct i915_syncmap itself (Tvrtko) v14: Foray into ascii art graphs v15: Take into account that the random lookup/insert does 2 prng calls, not 1, when benchmarking, and use for_each_set_bit() (Tvrtko) v16: Improved ascii art Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20170503093924.5320-4-chris@chris-wilson.co.uk
158 lines
4.8 KiB
C
158 lines
4.8 KiB
C
/*
|
|
* Copyright © 2016 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*
|
|
*/
|
|
|
|
#include "i915_drv.h"
|
|
#include "i915_syncmap.h"
|
|
|
|
static void __intel_timeline_init(struct intel_timeline *tl,
|
|
struct i915_gem_timeline *parent,
|
|
u64 context,
|
|
struct lock_class_key *lockclass,
|
|
const char *lockname)
|
|
{
|
|
tl->fence_context = context;
|
|
tl->common = parent;
|
|
#ifdef CONFIG_DEBUG_SPINLOCK
|
|
__raw_spin_lock_init(&tl->lock.rlock, lockname, lockclass);
|
|
#else
|
|
spin_lock_init(&tl->lock);
|
|
#endif
|
|
init_request_active(&tl->last_request, NULL);
|
|
INIT_LIST_HEAD(&tl->requests);
|
|
i915_syncmap_init(&tl->sync);
|
|
}
|
|
|
|
static void __intel_timeline_fini(struct intel_timeline *tl)
|
|
{
|
|
GEM_BUG_ON(!list_empty(&tl->requests));
|
|
|
|
i915_syncmap_free(&tl->sync);
|
|
}
|
|
|
|
static int __i915_gem_timeline_init(struct drm_i915_private *i915,
|
|
struct i915_gem_timeline *timeline,
|
|
const char *name,
|
|
struct lock_class_key *lockclass,
|
|
const char *lockname)
|
|
{
|
|
unsigned int i;
|
|
u64 fences;
|
|
|
|
lockdep_assert_held(&i915->drm.struct_mutex);
|
|
|
|
/*
|
|
* Ideally we want a set of engines on a single leaf as we expect
|
|
* to mostly be tracking synchronisation between engines. It is not
|
|
* a huge issue if this is not the case, but we may want to mitigate
|
|
* any page crossing penalties if they become an issue.
|
|
*/
|
|
BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES);
|
|
|
|
timeline->i915 = i915;
|
|
timeline->name = kstrdup(name ?: "[kernel]", GFP_KERNEL);
|
|
if (!timeline->name)
|
|
return -ENOMEM;
|
|
|
|
list_add(&timeline->link, &i915->gt.timelines);
|
|
|
|
/* Called during early_init before we know how many engines there are */
|
|
fences = dma_fence_context_alloc(ARRAY_SIZE(timeline->engine));
|
|
for (i = 0; i < ARRAY_SIZE(timeline->engine); i++)
|
|
__intel_timeline_init(&timeline->engine[i],
|
|
timeline, fences++,
|
|
lockclass, lockname);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int i915_gem_timeline_init(struct drm_i915_private *i915,
|
|
struct i915_gem_timeline *timeline,
|
|
const char *name)
|
|
{
|
|
static struct lock_class_key class;
|
|
|
|
return __i915_gem_timeline_init(i915, timeline, name,
|
|
&class, "&timeline->lock");
|
|
}
|
|
|
|
int i915_gem_timeline_init__global(struct drm_i915_private *i915)
|
|
{
|
|
static struct lock_class_key class;
|
|
|
|
return __i915_gem_timeline_init(i915,
|
|
&i915->gt.global_timeline,
|
|
"[execution]",
|
|
&class, "&global_timeline->lock");
|
|
}
|
|
|
|
/**
|
|
* i915_gem_timelines_mark_idle -- called when the driver idles
|
|
* @i915 - the drm_i915_private device
|
|
*
|
|
* When the driver is completely idle, we know that all of our sync points
|
|
* have been signaled and our tracking is then entirely redundant. Any request
|
|
* to wait upon an older sync point will be completed instantly as we know
|
|
* the fence is signaled and therefore we will not even look them up in the
|
|
* sync point map.
|
|
*/
|
|
void i915_gem_timelines_mark_idle(struct drm_i915_private *i915)
|
|
{
|
|
struct i915_gem_timeline *timeline;
|
|
int i;
|
|
|
|
lockdep_assert_held(&i915->drm.struct_mutex);
|
|
|
|
list_for_each_entry(timeline, &i915->gt.timelines, link) {
|
|
for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) {
|
|
struct intel_timeline *tl = &timeline->engine[i];
|
|
|
|
/*
|
|
* All known fences are completed so we can scrap
|
|
* the current sync point tracking and start afresh,
|
|
* any attempt to wait upon a previous sync point
|
|
* will be skipped as the fence was signaled.
|
|
*/
|
|
i915_syncmap_free(&tl->sync);
|
|
}
|
|
}
|
|
}
|
|
|
|
void i915_gem_timeline_fini(struct i915_gem_timeline *timeline)
|
|
{
|
|
int i;
|
|
|
|
lockdep_assert_held(&timeline->i915->drm.struct_mutex);
|
|
|
|
for (i = 0; i < ARRAY_SIZE(timeline->engine); i++)
|
|
__intel_timeline_fini(&timeline->engine[i]);
|
|
|
|
list_del(&timeline->link);
|
|
kfree(timeline->name);
|
|
}
|
|
|
|
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
|
|
#include "selftests/mock_timeline.c"
|
|
#include "selftests/i915_gem_timeline.c"
|
|
#endif
|