forked from Minki/linux
4797948071
Track the latest fence waited upon on each context, and only add a new asynchronous wait if the new fence is more recent than the recorded fence for that context. This requires us to filter out unordered timelines, which are noted by DMA_FENCE_NO_CONTEXT. However, in the absence of a universal identifier, we have to use our own i915->mm.unordered_timeline token. v2: Throw around the debug crutches v3: Inline the likely case of the pre-allocation cache being full. v4: Drop the pre-allocation support, we can lose the most recent fence in case of allocation failure -- it just means we may emit more awaits than strictly necessary but will not break. v5: Trim allocation size for leaf nodes, they only need an array of u32 not pointers. v6: Create mock_timeline to tidy selftest writing v7: s/intel_timeline_sync_get/intel_timeline_sync_is_later/ (Tvrtko) v8: Prune the stale sync points when we idle. v9: Include a small benchmark in the kselftests v10: Separate the idr implementation into its own compartment. (Tvrkto) v11: Refactor igt_sync kselftests to avoid deep nesting (Tvrkto) v12: __sync_leaf_idx() to assert that p->height is 0 when checking leaves v13: kselftests to investigate struct i915_syncmap itself (Tvrtko) v14: Foray into ascii art graphs v15: Take into account that the random lookup/insert does 2 prng calls, not 1, when benchmarking, and use for_each_set_bit() (Tvrtko) v16: Improved ascii art Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20170503093924.5320-4-chris@chris-wilson.co.uk
117 lines
3.7 KiB
C
117 lines
3.7 KiB
C
/*
|
|
* Copyright © 2016 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*
|
|
*/
|
|
|
|
#ifndef I915_GEM_TIMELINE_H
|
|
#define I915_GEM_TIMELINE_H
|
|
|
|
#include <linux/list.h>
|
|
|
|
#include "i915_utils.h"
|
|
#include "i915_gem_request.h"
|
|
#include "i915_syncmap.h"
|
|
|
|
struct i915_gem_timeline;
|
|
|
|
struct intel_timeline {
|
|
u64 fence_context;
|
|
u32 seqno;
|
|
|
|
/**
|
|
* Count of outstanding requests, from the time they are constructed
|
|
* to the moment they are retired. Loosely coupled to hardware.
|
|
*/
|
|
u32 inflight_seqnos;
|
|
|
|
spinlock_t lock;
|
|
|
|
/**
|
|
* List of breadcrumbs associated with GPU requests currently
|
|
* outstanding.
|
|
*/
|
|
struct list_head requests;
|
|
|
|
/* Contains an RCU guarded pointer to the last request. No reference is
|
|
* held to the request, users must carefully acquire a reference to
|
|
* the request using i915_gem_active_get_request_rcu(), or hold the
|
|
* struct_mutex.
|
|
*/
|
|
struct i915_gem_active last_request;
|
|
|
|
/**
|
|
* We track the most recent seqno that we wait on in every context so
|
|
* that we only have to emit a new await and dependency on a more
|
|
* recent sync point. As the contexts may be executed out-of-order, we
|
|
* have to track each individually and can not rely on an absolute
|
|
* global_seqno. When we know that all tracked fences are completed
|
|
* (i.e. when the driver is idle), we know that the syncmap is
|
|
* redundant and we can discard it without loss of generality.
|
|
*/
|
|
struct i915_syncmap *sync;
|
|
u32 sync_seqno[I915_NUM_ENGINES];
|
|
|
|
struct i915_gem_timeline *common;
|
|
};
|
|
|
|
struct i915_gem_timeline {
|
|
struct list_head link;
|
|
|
|
struct drm_i915_private *i915;
|
|
const char *name;
|
|
|
|
struct intel_timeline engine[I915_NUM_ENGINES];
|
|
};
|
|
|
|
int i915_gem_timeline_init(struct drm_i915_private *i915,
|
|
struct i915_gem_timeline *tl,
|
|
const char *name);
|
|
int i915_gem_timeline_init__global(struct drm_i915_private *i915);
|
|
void i915_gem_timelines_mark_idle(struct drm_i915_private *i915);
|
|
void i915_gem_timeline_fini(struct i915_gem_timeline *tl);
|
|
|
|
static inline int __intel_timeline_sync_set(struct intel_timeline *tl,
|
|
u64 context, u32 seqno)
|
|
{
|
|
return i915_syncmap_set(&tl->sync, context, seqno);
|
|
}
|
|
|
|
static inline int intel_timeline_sync_set(struct intel_timeline *tl,
|
|
const struct dma_fence *fence)
|
|
{
|
|
return __intel_timeline_sync_set(tl, fence->context, fence->seqno);
|
|
}
|
|
|
|
static inline bool __intel_timeline_sync_is_later(struct intel_timeline *tl,
|
|
u64 context, u32 seqno)
|
|
{
|
|
return i915_syncmap_is_later(&tl->sync, context, seqno);
|
|
}
|
|
|
|
static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl,
|
|
const struct dma_fence *fence)
|
|
{
|
|
return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno);
|
|
}
|
|
|
|
#endif
|