Merge tag 'drm-intel-gt-next-2021-10-21' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
UAPI Changes: - Expose multi-LRC submission interface Similar to the bonded submission interface but simplified. Comes with GuC only implementation for now. See kerneldoc for more details. Userspace changes: https://github.com/intel/media-driver/pull/1252 - Expose logical engine instance to user Needed by the multi-LRC submission interface for GuC Userspace changes: https://github.com/intel/media-driver/pull/1252 Driver Changes: - Fix blank screen booting crashes when CONFIG_CC_OPTIMIZE_FOR_SIZE=y (Hugh) - Add support for multi-LRC submission in the GuC backend (Matt B) - Add extra cache flushing before making pages userspace visible (Matt A, Thomas) - Mark internal GPU object pages dirty so they will be flushed properly (Matt A) - Move remaining debugfs interfaces i915_wedged/i915_forcewake_user into gt (Andi) - Replace the unconditional clflushes with drm_clflush_virt_range() (Ville) - Remove IS_ACTIVE macro completely (Lucas) - Improve kerneldocs for cache_dirty (Matt A) - Add missing includes (Lucas) - Selftest improvements (Matt R, Ran, Matt A) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/YXFmLKoq8Fg9JxSd@jlahtine-mobl.ger.corp.intel.com
This commit is contained in:
commit
6f2f7c8330
Documentation/gpu/rfc
drivers/gpu/drm/i915
gem
i915_gem_busy.ci915_gem_context.ci915_gem_context_types.hi915_gem_dmabuf.ci915_gem_execbuffer.ci915_gem_internal.ci915_gem_object.ci915_gem_object.hi915_gem_object_types.hi915_gem_shmem.ci915_gem_userptr.c
selftests
gt
intel_context.cintel_context.hintel_context_types.hintel_engine.hintel_engine_cs.cintel_engine_pm.cintel_engine_pm.hintel_engine_types.hintel_execlists_submission.cintel_gt_debugfs.cintel_gt_debugfs.hintel_gt_pm.hintel_gt_pm_debugfs.cintel_gt_pm_debugfs.hintel_llc.cintel_lrc.cintel_ring_submission.cintel_timeline.cselftest_execlists.c
i915_debugfs.ci915_query.ci915_request.ci915_request.hi915_vma.ci915_vma.hintel_wakeref.huc
selftests
include/uapi/drm
@ -1,122 +0,0 @@
|
||||
/* SPDX-License-Identifier: MIT */
|
||||
/*
|
||||
* Copyright © 2021 Intel Corporation
|
||||
*/
|
||||
|
||||
#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
|
||||
|
||||
/**
|
||||
* struct drm_i915_context_engines_parallel_submit - Configure engine for
|
||||
* parallel submission.
|
||||
*
|
||||
* Setup a slot in the context engine map to allow multiple BBs to be submitted
|
||||
* in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU
|
||||
* in parallel. Multiple hardware contexts are created internally in the i915
|
||||
* run these BBs. Once a slot is configured for N BBs only N BBs can be
|
||||
* submitted in each execbuf IOCTL and this is implicit behavior e.g. The user
|
||||
* doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how
|
||||
* many BBs there are based on the slot's configuration. The N BBs are the last
|
||||
* N buffer objects or first N if I915_EXEC_BATCH_FIRST is set.
|
||||
*
|
||||
* The default placement behavior is to create implicit bonds between each
|
||||
* context if each context maps to more than 1 physical engine (e.g. context is
|
||||
* a virtual engine). Also we only allow contexts of same engine class and these
|
||||
* contexts must be in logically contiguous order. Examples of the placement
|
||||
* behavior described below. Lastly, the default is to not allow BBs to
|
||||
* preempted mid BB rather insert coordinated preemption on all hardware
|
||||
* contexts between each set of BBs. Flags may be added in the future to change
|
||||
* both of these default behaviors.
|
||||
*
|
||||
* Returns -EINVAL if hardware context placement configuration is invalid or if
|
||||
* the placement configuration isn't supported on the platform / submission
|
||||
* interface.
|
||||
* Returns -ENODEV if extension isn't supported on the platform / submission
|
||||
* interface.
|
||||
*
|
||||
* .. code-block:: none
|
||||
*
|
||||
* Example 1 pseudo code:
|
||||
* CS[X] = generic engine of same class, logical instance X
|
||||
* INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
|
||||
* set_engines(INVALID)
|
||||
* set_parallel(engine_index=0, width=2, num_siblings=1,
|
||||
* engines=CS[0],CS[1])
|
||||
*
|
||||
* Results in the following valid placement:
|
||||
* CS[0], CS[1]
|
||||
*
|
||||
* Example 2 pseudo code:
|
||||
* CS[X] = generic engine of same class, logical instance X
|
||||
* INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
|
||||
* set_engines(INVALID)
|
||||
* set_parallel(engine_index=0, width=2, num_siblings=2,
|
||||
* engines=CS[0],CS[2],CS[1],CS[3])
|
||||
*
|
||||
* Results in the following valid placements:
|
||||
* CS[0], CS[1]
|
||||
* CS[2], CS[3]
|
||||
*
|
||||
* This can also be thought of as 2 virtual engines described by 2-D array
|
||||
* in the engines the field with bonds placed between each index of the
|
||||
* virtual engines. e.g. CS[0] is bonded to CS[1], CS[2] is bonded to
|
||||
* CS[3].
|
||||
* VE[0] = CS[0], CS[2]
|
||||
* VE[1] = CS[1], CS[3]
|
||||
*
|
||||
* Example 3 pseudo code:
|
||||
* CS[X] = generic engine of same class, logical instance X
|
||||
* INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
|
||||
* set_engines(INVALID)
|
||||
* set_parallel(engine_index=0, width=2, num_siblings=2,
|
||||
* engines=CS[0],CS[1],CS[1],CS[3])
|
||||
*
|
||||
* Results in the following valid and invalid placements:
|
||||
* CS[0], CS[1]
|
||||
* CS[1], CS[3] - Not logical contiguous, return -EINVAL
|
||||
*/
|
||||
struct drm_i915_context_engines_parallel_submit {
|
||||
/**
|
||||
* @base: base user extension.
|
||||
*/
|
||||
struct i915_user_extension base;
|
||||
|
||||
/**
|
||||
* @engine_index: slot for parallel engine
|
||||
*/
|
||||
__u16 engine_index;
|
||||
|
||||
/**
|
||||
* @width: number of contexts per parallel engine
|
||||
*/
|
||||
__u16 width;
|
||||
|
||||
/**
|
||||
* @num_siblings: number of siblings per context
|
||||
*/
|
||||
__u16 num_siblings;
|
||||
|
||||
/**
|
||||
* @mbz16: reserved for future use; must be zero
|
||||
*/
|
||||
__u16 mbz16;
|
||||
|
||||
/**
|
||||
* @flags: all undefined flags must be zero, currently not defined flags
|
||||
*/
|
||||
__u64 flags;
|
||||
|
||||
/**
|
||||
* @mbz64: reserved for future use; must be zero
|
||||
*/
|
||||
__u64 mbz64[3];
|
||||
|
||||
/**
|
||||
* @engines: 2-d array of engine instances to configure parallel engine
|
||||
*
|
||||
* length = width (i) * num_siblings (j)
|
||||
* index = j + i * num_siblings
|
||||
*/
|
||||
struct i915_engine_class_instance engines[0];
|
||||
|
||||
} __packed;
|
||||
|
@ -135,8 +135,8 @@ Add I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT and
|
||||
drm_i915_context_engines_parallel_submit to the uAPI to implement this
|
||||
extension.
|
||||
|
||||
.. kernel-doc:: Documentation/gpu/rfc/i915_parallel_execbuf.h
|
||||
:functions: drm_i915_context_engines_parallel_submit
|
||||
.. kernel-doc:: include/uapi/drm/i915_drm.h
|
||||
:functions: i915_context_engines_parallel_submit
|
||||
|
||||
Extend execbuf2 IOCTL to support submitting N BBs in a single IOCTL
|
||||
-------------------------------------------------------------------
|
||||
|
@ -4,6 +4,8 @@
|
||||
* Copyright © 2014-2016 Intel Corporation
|
||||
*/
|
||||
|
||||
#include <linux/dma-fence-array.h>
|
||||
|
||||
#include "gt/intel_engine.h"
|
||||
|
||||
#include "i915_gem_ioctls.h"
|
||||
@ -36,7 +38,7 @@ static __always_inline u32 __busy_write_id(u16 id)
|
||||
}
|
||||
|
||||
static __always_inline unsigned int
|
||||
__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id))
|
||||
__busy_set_if_active(struct dma_fence *fence, u32 (*flag)(u16 id))
|
||||
{
|
||||
const struct i915_request *rq;
|
||||
|
||||
@ -46,29 +48,60 @@ __busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u16 id))
|
||||
* to eventually flush us, but to minimise latency just ask the
|
||||
* hardware.
|
||||
*
|
||||
* Note we only report on the status of native fences.
|
||||
* Note we only report on the status of native fences and we currently
|
||||
* have two native fences:
|
||||
*
|
||||
* 1. A composite fence (dma_fence_array) constructed of i915 requests
|
||||
* created during a parallel submission. In this case we deconstruct the
|
||||
* composite fence into individual i915 requests and check the status of
|
||||
* each request.
|
||||
*
|
||||
* 2. A single i915 request.
|
||||
*/
|
||||
if (!dma_fence_is_i915(fence))
|
||||
return 0;
|
||||
if (dma_fence_is_array(fence)) {
|
||||
struct dma_fence_array *array = to_dma_fence_array(fence);
|
||||
struct dma_fence **child = array->fences;
|
||||
unsigned int nchild = array->num_fences;
|
||||
|
||||
/* opencode to_request() in order to avoid const warnings */
|
||||
rq = container_of(fence, const struct i915_request, fence);
|
||||
if (i915_request_completed(rq))
|
||||
return 0;
|
||||
do {
|
||||
struct dma_fence *current_fence = *child++;
|
||||
|
||||
/* Beware type-expansion follies! */
|
||||
BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class));
|
||||
return flag(rq->engine->uabi_class);
|
||||
/* Not an i915 fence, can't be busy per above */
|
||||
if (!dma_fence_is_i915(current_fence) ||
|
||||
!test_bit(I915_FENCE_FLAG_COMPOSITE,
|
||||
¤t_fence->flags)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
rq = to_request(current_fence);
|
||||
if (!i915_request_completed(rq))
|
||||
return flag(rq->engine->uabi_class);
|
||||
} while (--nchild);
|
||||
|
||||
/* All requests in array complete, not busy */
|
||||
return 0;
|
||||
} else {
|
||||
if (!dma_fence_is_i915(fence))
|
||||
return 0;
|
||||
|
||||
rq = to_request(fence);
|
||||
if (i915_request_completed(rq))
|
||||
return 0;
|
||||
|
||||
/* Beware type-expansion follies! */
|
||||
BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class));
|
||||
return flag(rq->engine->uabi_class);
|
||||
}
|
||||
}
|
||||
|
||||
static __always_inline unsigned int
|
||||
busy_check_reader(const struct dma_fence *fence)
|
||||
busy_check_reader(struct dma_fence *fence)
|
||||
{
|
||||
return __busy_set_if_active(fence, __busy_read_flag);
|
||||
}
|
||||
|
||||
static __always_inline unsigned int
|
||||
busy_check_writer(const struct dma_fence *fence)
|
||||
busy_check_writer(struct dma_fence *fence)
|
||||
{
|
||||
if (!fence)
|
||||
return 0;
|
||||
|
@ -556,9 +556,147 @@ set_proto_ctx_engines_bond(struct i915_user_extension __user *base, void *data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base,
|
||||
void *data)
|
||||
{
|
||||
struct i915_context_engines_parallel_submit __user *ext =
|
||||
container_of_user(base, typeof(*ext), base);
|
||||
const struct set_proto_ctx_engines *set = data;
|
||||
struct drm_i915_private *i915 = set->i915;
|
||||
u64 flags;
|
||||
int err = 0, n, i, j;
|
||||
u16 slot, width, num_siblings;
|
||||
struct intel_engine_cs **siblings = NULL;
|
||||
intel_engine_mask_t prev_mask;
|
||||
|
||||
/* FIXME: This is NIY for execlists */
|
||||
if (!(intel_uc_uses_guc_submission(&i915->gt.uc)))
|
||||
return -ENODEV;
|
||||
|
||||
if (get_user(slot, &ext->engine_index))
|
||||
return -EFAULT;
|
||||
|
||||
if (get_user(width, &ext->width))
|
||||
return -EFAULT;
|
||||
|
||||
if (get_user(num_siblings, &ext->num_siblings))
|
||||
return -EFAULT;
|
||||
|
||||
if (slot >= set->num_engines) {
|
||||
drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n",
|
||||
slot, set->num_engines);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (set->engines[slot].type != I915_GEM_ENGINE_TYPE_INVALID) {
|
||||
drm_dbg(&i915->drm,
|
||||
"Invalid placement[%d], already occupied\n", slot);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (get_user(flags, &ext->flags))
|
||||
return -EFAULT;
|
||||
|
||||
if (flags) {
|
||||
drm_dbg(&i915->drm, "Unknown flags 0x%02llx", flags);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
|
||||
err = check_user_mbz(&ext->mbz64[n]);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
if (width < 2) {
|
||||
drm_dbg(&i915->drm, "Width (%d) < 2\n", width);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (num_siblings < 1) {
|
||||
drm_dbg(&i915->drm, "Number siblings (%d) < 1\n",
|
||||
num_siblings);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
siblings = kmalloc_array(num_siblings * width,
|
||||
sizeof(*siblings),
|
||||
GFP_KERNEL);
|
||||
if (!siblings)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Create contexts / engines */
|
||||
for (i = 0; i < width; ++i) {
|
||||
intel_engine_mask_t current_mask = 0;
|
||||
struct i915_engine_class_instance prev_engine;
|
||||
|
||||
for (j = 0; j < num_siblings; ++j) {
|
||||
struct i915_engine_class_instance ci;
|
||||
|
||||
n = i * num_siblings + j;
|
||||
if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) {
|
||||
err = -EFAULT;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
siblings[n] =
|
||||
intel_engine_lookup_user(i915, ci.engine_class,
|
||||
ci.engine_instance);
|
||||
if (!siblings[n]) {
|
||||
drm_dbg(&i915->drm,
|
||||
"Invalid sibling[%d]: { class:%d, inst:%d }\n",
|
||||
n, ci.engine_class, ci.engine_instance);
|
||||
err = -EINVAL;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
if (n) {
|
||||
if (prev_engine.engine_class !=
|
||||
ci.engine_class) {
|
||||
drm_dbg(&i915->drm,
|
||||
"Mismatched class %d, %d\n",
|
||||
prev_engine.engine_class,
|
||||
ci.engine_class);
|
||||
err = -EINVAL;
|
||||
goto out_err;
|
||||
}
|
||||
}
|
||||
|
||||
prev_engine = ci;
|
||||
current_mask |= siblings[n]->logical_mask;
|
||||
}
|
||||
|
||||
if (i > 0) {
|
||||
if (current_mask != prev_mask << 1) {
|
||||
drm_dbg(&i915->drm,
|
||||
"Non contiguous logical mask 0x%x, 0x%x\n",
|
||||
prev_mask, current_mask);
|
||||
err = -EINVAL;
|
||||
goto out_err;
|
||||
}
|
||||
}
|
||||
prev_mask = current_mask;
|
||||
}
|
||||
|
||||
set->engines[slot].type = I915_GEM_ENGINE_TYPE_PARALLEL;
|
||||
set->engines[slot].num_siblings = num_siblings;
|
||||
set->engines[slot].width = width;
|
||||
set->engines[slot].siblings = siblings;
|
||||
|
||||
return 0;
|
||||
|
||||
out_err:
|
||||
kfree(siblings);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static const i915_user_extension_fn set_proto_ctx_engines_extensions[] = {
|
||||
[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_proto_ctx_engines_balance,
|
||||
[I915_CONTEXT_ENGINES_EXT_BOND] = set_proto_ctx_engines_bond,
|
||||
[I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT] =
|
||||
set_proto_ctx_engines_parallel_submit,
|
||||
};
|
||||
|
||||
static int set_proto_ctx_engines(struct drm_i915_file_private *fpriv,
|
||||
@ -794,6 +932,7 @@ static int intel_context_set_gem(struct intel_context *ce,
|
||||
GEM_BUG_ON(rcu_access_pointer(ce->gem_context));
|
||||
RCU_INIT_POINTER(ce->gem_context, ctx);
|
||||
|
||||
GEM_BUG_ON(intel_context_is_pinned(ce));
|
||||
ce->ring_size = SZ_16K;
|
||||
|
||||
i915_vm_put(ce->vm);
|
||||
@ -818,6 +957,25 @@ static int intel_context_set_gem(struct intel_context *ce,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __unpin_engines(struct i915_gem_engines *e, unsigned int count)
|
||||
{
|
||||
while (count--) {
|
||||
struct intel_context *ce = e->engines[count], *child;
|
||||
|
||||
if (!ce || !test_bit(CONTEXT_PERMA_PIN, &ce->flags))
|
||||
continue;
|
||||
|
||||
for_each_child(ce, child)
|
||||
intel_context_unpin(child);
|
||||
intel_context_unpin(ce);
|
||||
}
|
||||
}
|
||||
|
||||
static void unpin_engines(struct i915_gem_engines *e)
|
||||
{
|
||||
__unpin_engines(e, e->num_engines);
|
||||
}
|
||||
|
||||
static void __free_engines(struct i915_gem_engines *e, unsigned int count)
|
||||
{
|
||||
while (count--) {
|
||||
@ -933,6 +1091,40 @@ free_engines:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int perma_pin_contexts(struct intel_context *ce)
|
||||
{
|
||||
struct intel_context *child;
|
||||
int i = 0, j = 0, ret;
|
||||
|
||||
GEM_BUG_ON(!intel_context_is_parent(ce));
|
||||
|
||||
ret = intel_context_pin(ce);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
for_each_child(ce, child) {
|
||||
ret = intel_context_pin(child);
|
||||
if (unlikely(ret))
|
||||
goto unwind;
|
||||
++i;
|
||||
}
|
||||
|
||||
set_bit(CONTEXT_PERMA_PIN, &ce->flags);
|
||||
|
||||
return 0;
|
||||
|
||||
unwind:
|
||||
intel_context_unpin(ce);
|
||||
for_each_child(ce, child) {
|
||||
if (j++ < i)
|
||||
intel_context_unpin(child);
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx,
|
||||
unsigned int num_engines,
|
||||
struct i915_gem_proto_engine *pe)
|
||||
@ -946,7 +1138,7 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx,
|
||||
e->num_engines = num_engines;
|
||||
|
||||
for (n = 0; n < num_engines; n++) {
|
||||
struct intel_context *ce;
|
||||
struct intel_context *ce, *child;
|
||||
int ret;
|
||||
|
||||
switch (pe[n].type) {
|
||||
@ -956,7 +1148,13 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx,
|
||||
|
||||
case I915_GEM_ENGINE_TYPE_BALANCED:
|
||||
ce = intel_engine_create_virtual(pe[n].siblings,
|
||||
pe[n].num_siblings);
|
||||
pe[n].num_siblings, 0);
|
||||
break;
|
||||
|
||||
case I915_GEM_ENGINE_TYPE_PARALLEL:
|
||||
ce = intel_engine_create_parallel(pe[n].siblings,
|
||||
pe[n].num_siblings,
|
||||
pe[n].width);
|
||||
break;
|
||||
|
||||
case I915_GEM_ENGINE_TYPE_INVALID:
|
||||
@ -977,6 +1175,30 @@ static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx,
|
||||
err = ERR_PTR(ret);
|
||||
goto free_engines;
|
||||
}
|
||||
for_each_child(ce, child) {
|
||||
ret = intel_context_set_gem(child, ctx, pe->sseu);
|
||||
if (ret) {
|
||||
err = ERR_PTR(ret);
|
||||
goto free_engines;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: Must be done after calling intel_context_set_gem as that
|
||||
* function changes the ring size. The ring is allocated when
|
||||
* the context is pinned. If the ring size is changed after
|
||||
* allocation we have a mismatch of the ring size and will cause
|
||||
* the context to hang. Presumably with a bit of reordering we
|
||||
* could move the perma-pin step to the backend function
|
||||
* intel_engine_create_parallel.
|
||||
*/
|
||||
if (pe[n].type == I915_GEM_ENGINE_TYPE_PARALLEL) {
|
||||
ret = perma_pin_contexts(ce);
|
||||
if (ret) {
|
||||
err = ERR_PTR(ret);
|
||||
goto free_engines;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return e;
|
||||
@ -1219,6 +1441,7 @@ static void context_close(struct i915_gem_context *ctx)
|
||||
|
||||
/* Flush any concurrent set_engines() */
|
||||
mutex_lock(&ctx->engines_mutex);
|
||||
unpin_engines(__context_engines_static(ctx));
|
||||
engines_idle_release(ctx, rcu_replace_pointer(ctx->engines, NULL, 1));
|
||||
i915_gem_context_set_closed(ctx);
|
||||
mutex_unlock(&ctx->engines_mutex);
|
||||
|
@ -78,13 +78,16 @@ enum i915_gem_engine_type {
|
||||
|
||||
/** @I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set */
|
||||
I915_GEM_ENGINE_TYPE_BALANCED,
|
||||
|
||||
/** @I915_GEM_ENGINE_TYPE_PARALLEL: A parallel engine set */
|
||||
I915_GEM_ENGINE_TYPE_PARALLEL,
|
||||
};
|
||||
|
||||
/**
|
||||
* struct i915_gem_proto_engine - prototype engine
|
||||
*
|
||||
* This struct describes an engine that a context may contain. Engines
|
||||
* have three types:
|
||||
* have four types:
|
||||
*
|
||||
* - I915_GEM_ENGINE_TYPE_INVALID: Invalid engines can be created but they
|
||||
* show up as a NULL in i915_gem_engines::engines[i] and any attempt to
|
||||
@ -97,6 +100,10 @@ enum i915_gem_engine_type {
|
||||
*
|
||||
* - I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set, described
|
||||
* i915_gem_proto_engine::num_siblings and i915_gem_proto_engine::siblings.
|
||||
*
|
||||
* - I915_GEM_ENGINE_TYPE_PARALLEL: A parallel submission engine set, described
|
||||
* i915_gem_proto_engine::width, i915_gem_proto_engine::num_siblings, and
|
||||
* i915_gem_proto_engine::siblings.
|
||||
*/
|
||||
struct i915_gem_proto_engine {
|
||||
/** @type: Type of this engine */
|
||||
@ -105,10 +112,13 @@ struct i915_gem_proto_engine {
|
||||
/** @engine: Engine, for physical */
|
||||
struct intel_engine_cs *engine;
|
||||
|
||||
/** @num_siblings: Number of balanced siblings */
|
||||
/** @num_siblings: Number of balanced or parallel siblings */
|
||||
unsigned int num_siblings;
|
||||
|
||||
/** @siblings: Balanced siblings */
|
||||
/** @width: Width of each sibling */
|
||||
unsigned int width;
|
||||
|
||||
/** @siblings: Balanced siblings or num_siblings * width for parallel */
|
||||
struct intel_engine_cs **siblings;
|
||||
|
||||
/** @sseu: Client-set SSEU parameters */
|
||||
|
@ -232,6 +232,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags)
|
||||
|
||||
static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
struct drm_i915_private *i915 = to_i915(obj->base.dev);
|
||||
struct sg_table *pages;
|
||||
unsigned int sg_page_sizes;
|
||||
|
||||
@ -242,8 +243,11 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
|
||||
if (IS_ERR(pages))
|
||||
return PTR_ERR(pages);
|
||||
|
||||
sg_page_sizes = i915_sg_dma_sizes(pages->sgl);
|
||||
/* XXX: consider doing a vmap flush or something */
|
||||
if (!HAS_LLC(i915) || i915_gem_object_can_bypass_llc(obj))
|
||||
wbinvd_on_all_cpus();
|
||||
|
||||
sg_page_sizes = i915_sg_dma_sizes(pages->sgl);
|
||||
__i915_gem_object_set_pages(obj, pages, sg_page_sizes);
|
||||
|
||||
return 0;
|
||||
@ -301,7 +305,8 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
|
||||
}
|
||||
|
||||
drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
|
||||
i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops, &lock_class, 0);
|
||||
i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops, &lock_class,
|
||||
I915_BO_ALLOC_USER);
|
||||
obj->base.import_attach = attach;
|
||||
obj->base.resv = dma_buf->resv;
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -134,6 +134,8 @@ static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj,
|
||||
internal_free_pages(pages);
|
||||
|
||||
obj->mm.dirty = false;
|
||||
|
||||
__start_cpu_write(obj);
|
||||
}
|
||||
|
||||
static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = {
|
||||
|
@ -128,6 +128,32 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
|
||||
!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
|
||||
}
|
||||
|
||||
bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
struct drm_i915_private *i915 = to_i915(obj->base.dev);
|
||||
|
||||
/*
|
||||
* This is purely from a security perspective, so we simply don't care
|
||||
* about non-userspace objects being able to bypass the LLC.
|
||||
*/
|
||||
if (!(obj->flags & I915_BO_ALLOC_USER))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
|
||||
* possible for userspace to bypass the GTT caching bits set by the
|
||||
* kernel, as per the given object cache_level. This is troublesome
|
||||
* since the heavy flush we apply when first gathering the pages is
|
||||
* skipped if the kernel thinks the object is coherent with the GPU. As
|
||||
* a result it might be possible to bypass the cache and read the
|
||||
* contents of the page directly, which could be stale data. If it's
|
||||
* just a case of userspace shooting themselves in the foot then so be
|
||||
* it, but since i915 takes the stance of always zeroing memory before
|
||||
* handing it to userspace, we need to prevent this.
|
||||
*/
|
||||
return IS_JSL_EHL(i915);
|
||||
}
|
||||
|
||||
static void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
|
||||
{
|
||||
struct drm_i915_gem_object *obj = to_intel_bo(gem);
|
||||
|
@ -514,6 +514,7 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
|
||||
|
||||
void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
|
||||
unsigned int cache_level);
|
||||
bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj);
|
||||
void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
|
||||
void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj);
|
||||
|
||||
|
@ -427,6 +427,33 @@ struct drm_i915_gem_object {
|
||||
* can freely bypass the CPU cache when touching the pages with the GPU,
|
||||
* where the kernel is completely unaware. On such platform we need
|
||||
* apply the sledgehammer-on-acquire regardless of the @cache_coherent.
|
||||
*
|
||||
* Special care is taken on non-LLC platforms, to prevent potential
|
||||
* information leak. The driver currently ensures:
|
||||
*
|
||||
* 1. All userspace objects, by default, have @cache_level set as
|
||||
* I915_CACHE_NONE. The only exception is userptr objects, where we
|
||||
* instead force I915_CACHE_LLC, but we also don't allow userspace to
|
||||
* ever change the @cache_level for such objects. Another special case
|
||||
* is dma-buf, which doesn't rely on @cache_dirty, but there we
|
||||
* always do a forced flush when acquiring the pages, if there is a
|
||||
* chance that the pages can be read directly from main memory with
|
||||
* the GPU.
|
||||
*
|
||||
* 2. All I915_CACHE_NONE objects have @cache_dirty initially true.
|
||||
*
|
||||
* 3. All swapped-out objects(i.e shmem) have @cache_dirty set to
|
||||
* true.
|
||||
*
|
||||
* 4. The @cache_dirty is never freely reset before the initial
|
||||
* flush, even if userspace adjusts the @cache_level through the
|
||||
* i915_gem_set_caching_ioctl.
|
||||
*
|
||||
* 5. All @cache_dirty objects(including swapped-in) are initially
|
||||
* flushed with a synchronous call to drm_clflush_sg in
|
||||
* __i915_gem_object_set_pages. The @cache_dirty can be freely reset
|
||||
* at this point. All further asynchronous clfushes are never security
|
||||
* critical, i.e userspace is free to race against itself.
|
||||
*/
|
||||
unsigned int cache_dirty:1;
|
||||
|
||||
|
@ -182,22 +182,7 @@ rebuild_st:
|
||||
if (i915_gem_object_needs_bit17_swizzle(obj))
|
||||
i915_gem_object_do_bit_17_swizzle(obj, st);
|
||||
|
||||
/*
|
||||
* EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
|
||||
* possible for userspace to bypass the GTT caching bits set by the
|
||||
* kernel, as per the given object cache_level. This is troublesome
|
||||
* since the heavy flush we apply when first gathering the pages is
|
||||
* skipped if the kernel thinks the object is coherent with the GPU. As
|
||||
* a result it might be possible to bypass the cache and read the
|
||||
* contents of the page directly, which could be stale data. If it's
|
||||
* just a case of userspace shooting themselves in the foot then so be
|
||||
* it, but since i915 takes the stance of always zeroing memory before
|
||||
* handing it to userspace, we need to prevent this.
|
||||
*
|
||||
* By setting cache_dirty here we make the clflush in set_pages
|
||||
* unconditional on such platforms.
|
||||
*/
|
||||
if (IS_JSL_EHL(i915) && obj->flags & I915_BO_ALLOC_USER)
|
||||
if (i915_gem_object_can_bypass_llc(obj))
|
||||
obj->cache_dirty = true;
|
||||
|
||||
__i915_gem_object_set_pages(obj, st, sg_page_sizes);
|
||||
@ -301,6 +286,8 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
|
||||
struct sg_table *pages,
|
||||
bool needs_clflush)
|
||||
{
|
||||
struct drm_i915_private *i915 = to_i915(obj->base.dev);
|
||||
|
||||
GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
|
||||
|
||||
if (obj->mm.madv == I915_MADV_DONTNEED)
|
||||
@ -312,6 +299,16 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
|
||||
drm_clflush_sg(pages);
|
||||
|
||||
__start_cpu_write(obj);
|
||||
/*
|
||||
* On non-LLC platforms, force the flush-on-acquire if this is ever
|
||||
* swapped-in. Our async flush path is not trust worthy enough yet(and
|
||||
* happens in the wrong order), and with some tricks it's conceivable
|
||||
* for userspace to change the cache-level to I915_CACHE_NONE after the
|
||||
* pages are swapped-in, and since execbuf binds the object before doing
|
||||
* the async flush, we have a race window.
|
||||
*/
|
||||
if (!HAS_LLC(i915))
|
||||
obj->cache_dirty = true;
|
||||
}
|
||||
|
||||
void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages)
|
||||
|
@ -165,8 +165,11 @@ alloc_table:
|
||||
goto err;
|
||||
}
|
||||
|
||||
sg_page_sizes = i915_sg_dma_sizes(st->sgl);
|
||||
WARN_ON_ONCE(!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE));
|
||||
if (i915_gem_object_can_bypass_llc(obj))
|
||||
obj->cache_dirty = true;
|
||||
|
||||
sg_page_sizes = i915_sg_dma_sizes(st->sgl);
|
||||
__i915_gem_object_set_pages(obj, st, sg_page_sizes);
|
||||
|
||||
return 0;
|
||||
@ -546,7 +549,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
|
||||
return -ENOMEM;
|
||||
|
||||
drm_gem_private_object_init(dev, &obj->base, args->user_size);
|
||||
i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class, 0);
|
||||
i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class,
|
||||
I915_BO_ALLOC_USER);
|
||||
obj->mem_flags = I915_BO_FLAG_STRUCT_PAGE;
|
||||
obj->read_domains = I915_GEM_DOMAIN_CPU;
|
||||
obj->write_domain = I915_GEM_DOMAIN_CPU;
|
||||
|
@ -136,6 +136,8 @@ static void put_huge_pages(struct drm_i915_gem_object *obj,
|
||||
huge_pages_free_pages(pages);
|
||||
|
||||
obj->mm.dirty = false;
|
||||
|
||||
__start_cpu_write(obj);
|
||||
}
|
||||
|
||||
static const struct drm_i915_gem_object_ops huge_page_ops = {
|
||||
@ -152,6 +154,7 @@ huge_pages_object(struct drm_i915_private *i915,
|
||||
{
|
||||
static struct lock_class_key lock_class;
|
||||
struct drm_i915_gem_object *obj;
|
||||
unsigned int cache_level;
|
||||
|
||||
GEM_BUG_ON(!size);
|
||||
GEM_BUG_ON(!IS_ALIGNED(size, BIT(__ffs(page_mask))));
|
||||
@ -173,7 +176,9 @@ huge_pages_object(struct drm_i915_private *i915,
|
||||
|
||||
obj->write_domain = I915_GEM_DOMAIN_CPU;
|
||||
obj->read_domains = I915_GEM_DOMAIN_CPU;
|
||||
obj->cache_level = I915_CACHE_NONE;
|
||||
|
||||
cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
|
||||
i915_gem_object_set_cache_coherency(obj, cache_level);
|
||||
|
||||
obj->mm.page_mask = page_mask;
|
||||
|
||||
|
@ -17,13 +17,20 @@
|
||||
#include "huge_gem_object.h"
|
||||
#include "mock_context.h"
|
||||
|
||||
enum client_tiling {
|
||||
CLIENT_TILING_LINEAR,
|
||||
CLIENT_TILING_X,
|
||||
CLIENT_TILING_Y,
|
||||
CLIENT_NUM_TILING_TYPES
|
||||
};
|
||||
|
||||
#define WIDTH 512
|
||||
#define HEIGHT 32
|
||||
|
||||
struct blit_buffer {
|
||||
struct i915_vma *vma;
|
||||
u32 start_val;
|
||||
u32 tiling;
|
||||
enum client_tiling tiling;
|
||||
};
|
||||
|
||||
struct tiled_blits {
|
||||
@ -53,9 +60,9 @@ static int prepare_blit(const struct tiled_blits *t,
|
||||
*cs++ = MI_LOAD_REGISTER_IMM(1);
|
||||
*cs++ = i915_mmio_reg_offset(BCS_SWCTRL);
|
||||
cmd = (BCS_SRC_Y | BCS_DST_Y) << 16;
|
||||
if (src->tiling == I915_TILING_Y)
|
||||
if (src->tiling == CLIENT_TILING_Y)
|
||||
cmd |= BCS_SRC_Y;
|
||||
if (dst->tiling == I915_TILING_Y)
|
||||
if (dst->tiling == CLIENT_TILING_Y)
|
||||
cmd |= BCS_DST_Y;
|
||||
*cs++ = cmd;
|
||||
|
||||
@ -172,7 +179,7 @@ static int tiled_blits_create_buffers(struct tiled_blits *t,
|
||||
|
||||
t->buffers[i].vma = vma;
|
||||
t->buffers[i].tiling =
|
||||
i915_prandom_u32_max_state(I915_TILING_Y + 1, prng);
|
||||
i915_prandom_u32_max_state(CLIENT_TILING_Y + 1, prng);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -197,17 +204,17 @@ static u64 swizzle_bit(unsigned int bit, u64 offset)
|
||||
static u64 tiled_offset(const struct intel_gt *gt,
|
||||
u64 v,
|
||||
unsigned int stride,
|
||||
unsigned int tiling)
|
||||
enum client_tiling tiling)
|
||||
{
|
||||
unsigned int swizzle;
|
||||
u64 x, y;
|
||||
|
||||
if (tiling == I915_TILING_NONE)
|
||||
if (tiling == CLIENT_TILING_LINEAR)
|
||||
return v;
|
||||
|
||||
y = div64_u64_rem(v, stride, &x);
|
||||
|
||||
if (tiling == I915_TILING_X) {
|
||||
if (tiling == CLIENT_TILING_X) {
|
||||
v = div64_u64_rem(y, 8, &y) * stride * 8;
|
||||
v += y * 512;
|
||||
v += div64_u64_rem(x, 512, &x) << 12;
|
||||
@ -244,12 +251,12 @@ static u64 tiled_offset(const struct intel_gt *gt,
|
||||
return v;
|
||||
}
|
||||
|
||||
static const char *repr_tiling(int tiling)
|
||||
static const char *repr_tiling(enum client_tiling tiling)
|
||||
{
|
||||
switch (tiling) {
|
||||
case I915_TILING_NONE: return "linear";
|
||||
case I915_TILING_X: return "X";
|
||||
case I915_TILING_Y: return "Y";
|
||||
case CLIENT_TILING_LINEAR: return "linear";
|
||||
case CLIENT_TILING_X: return "X";
|
||||
case CLIENT_TILING_Y: return "Y";
|
||||
default: return "unknown";
|
||||
}
|
||||
}
|
||||
|
@ -240,6 +240,8 @@ int __intel_context_do_pin_ww(struct intel_context *ce,
|
||||
if (err)
|
||||
goto err_post_unpin;
|
||||
|
||||
intel_engine_pm_might_get(ce->engine);
|
||||
|
||||
if (unlikely(intel_context_is_closed(ce))) {
|
||||
err = -ENOENT;
|
||||
goto err_unlock;
|
||||
@ -362,8 +364,8 @@ static int __intel_context_active(struct i915_active *active)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sw_fence_dummy_notify(struct i915_sw_fence *sf,
|
||||
enum i915_sw_fence_notify state)
|
||||
static int __i915_sw_fence_call
|
||||
sw_fence_dummy_notify(struct i915_sw_fence *sf, enum i915_sw_fence_notify state)
|
||||
{
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
@ -399,6 +401,10 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
|
||||
ce->guc_id.id = GUC_INVALID_LRC_ID;
|
||||
INIT_LIST_HEAD(&ce->guc_id.link);
|
||||
|
||||
INIT_LIST_HEAD(&ce->destroyed_link);
|
||||
|
||||
INIT_LIST_HEAD(&ce->parallel.child_list);
|
||||
|
||||
/*
|
||||
* Initialize fence to be complete as this is expected to be complete
|
||||
* unless there is a pending schedule disable outstanding.
|
||||
@ -413,10 +419,17 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
|
||||
|
||||
void intel_context_fini(struct intel_context *ce)
|
||||
{
|
||||
struct intel_context *child, *next;
|
||||
|
||||
if (ce->timeline)
|
||||
intel_timeline_put(ce->timeline);
|
||||
i915_vm_put(ce->vm);
|
||||
|
||||
/* Need to put the creation ref for the children */
|
||||
if (intel_context_is_parent(ce))
|
||||
for_each_child_safe(ce, child, next)
|
||||
intel_context_put(child);
|
||||
|
||||
mutex_destroy(&ce->pin_mutex);
|
||||
i915_active_fini(&ce->active);
|
||||
i915_sw_fence_fini(&ce->guc_state.blocked);
|
||||
@ -515,24 +528,53 @@ retry:
|
||||
|
||||
struct i915_request *intel_context_find_active_request(struct intel_context *ce)
|
||||
{
|
||||
struct intel_context *parent = intel_context_to_parent(ce);
|
||||
struct i915_request *rq, *active = NULL;
|
||||
unsigned long flags;
|
||||
|
||||
GEM_BUG_ON(!intel_engine_uses_guc(ce->engine));
|
||||
|
||||
spin_lock_irqsave(&ce->guc_state.lock, flags);
|
||||
list_for_each_entry_reverse(rq, &ce->guc_state.requests,
|
||||
/*
|
||||
* We search the parent list to find an active request on the submitted
|
||||
* context. The parent list contains the requests for all the contexts
|
||||
* in the relationship so we have to do a compare of each request's
|
||||
* context.
|
||||
*/
|
||||
spin_lock_irqsave(&parent->guc_state.lock, flags);
|
||||
list_for_each_entry_reverse(rq, &parent->guc_state.requests,
|
||||
sched.link) {
|
||||
if (rq->context != ce)
|
||||
continue;
|
||||
if (i915_request_completed(rq))
|
||||
break;
|
||||
|
||||
active = rq;
|
||||
}
|
||||
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
|
||||
spin_unlock_irqrestore(&parent->guc_state.lock, flags);
|
||||
|
||||
return active;
|
||||
}
|
||||
|
||||
void intel_context_bind_parent_child(struct intel_context *parent,
|
||||
struct intel_context *child)
|
||||
{
|
||||
/*
|
||||
* Callers responsibility to validate that this function is used
|
||||
* correctly but we use GEM_BUG_ON here ensure that they do.
|
||||
*/
|
||||
GEM_BUG_ON(!intel_engine_uses_guc(parent->engine));
|
||||
GEM_BUG_ON(intel_context_is_pinned(parent));
|
||||
GEM_BUG_ON(intel_context_is_child(parent));
|
||||
GEM_BUG_ON(intel_context_is_pinned(child));
|
||||
GEM_BUG_ON(intel_context_is_child(child));
|
||||
GEM_BUG_ON(intel_context_is_parent(child));
|
||||
|
||||
parent->parallel.child_index = parent->parallel.number_children++;
|
||||
list_add_tail(&child->parallel.child_link,
|
||||
&parent->parallel.child_list);
|
||||
child->parallel.parent = parent;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
|
||||
#include "selftest_context.c"
|
||||
#endif
|
||||
|
@ -44,6 +44,54 @@ void intel_context_free(struct intel_context *ce);
|
||||
int intel_context_reconfigure_sseu(struct intel_context *ce,
|
||||
const struct intel_sseu sseu);
|
||||
|
||||
#define PARENT_SCRATCH_SIZE PAGE_SIZE
|
||||
|
||||
static inline bool intel_context_is_child(struct intel_context *ce)
|
||||
{
|
||||
return !!ce->parallel.parent;
|
||||
}
|
||||
|
||||
static inline bool intel_context_is_parent(struct intel_context *ce)
|
||||
{
|
||||
return !!ce->parallel.number_children;
|
||||
}
|
||||
|
||||
static inline bool intel_context_is_pinned(struct intel_context *ce);
|
||||
|
||||
static inline struct intel_context *
|
||||
intel_context_to_parent(struct intel_context *ce)
|
||||
{
|
||||
if (intel_context_is_child(ce)) {
|
||||
/*
|
||||
* The parent holds ref count to the child so it is always safe
|
||||
* for the parent to access the child, but the child has a
|
||||
* pointer to the parent without a ref. To ensure this is safe
|
||||
* the child should only access the parent pointer while the
|
||||
* parent is pinned.
|
||||
*/
|
||||
GEM_BUG_ON(!intel_context_is_pinned(ce->parallel.parent));
|
||||
|
||||
return ce->parallel.parent;
|
||||
} else {
|
||||
return ce;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool intel_context_is_parallel(struct intel_context *ce)
|
||||
{
|
||||
return intel_context_is_child(ce) || intel_context_is_parent(ce);
|
||||
}
|
||||
|
||||
void intel_context_bind_parent_child(struct intel_context *parent,
|
||||
struct intel_context *child);
|
||||
|
||||
#define for_each_child(parent, ce)\
|
||||
list_for_each_entry(ce, &(parent)->parallel.child_list,\
|
||||
parallel.child_link)
|
||||
#define for_each_child_safe(parent, ce, cn)\
|
||||
list_for_each_entry_safe(ce, cn, &(parent)->parallel.child_list,\
|
||||
parallel.child_link)
|
||||
|
||||
/**
|
||||
* intel_context_lock_pinned - Stablises the 'pinned' status of the HW context
|
||||
* @ce - the context
|
||||
@ -193,7 +241,13 @@ intel_context_timeline_lock(struct intel_context *ce)
|
||||
struct intel_timeline *tl = ce->timeline;
|
||||
int err;
|
||||
|
||||
err = mutex_lock_interruptible(&tl->mutex);
|
||||
if (intel_context_is_parent(ce))
|
||||
err = mutex_lock_interruptible_nested(&tl->mutex, 0);
|
||||
else if (intel_context_is_child(ce))
|
||||
err = mutex_lock_interruptible_nested(&tl->mutex,
|
||||
ce->parallel.child_index + 1);
|
||||
else
|
||||
err = mutex_lock_interruptible(&tl->mutex);
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
|
||||
|
@ -55,9 +55,13 @@ struct intel_context_ops {
|
||||
void (*reset)(struct intel_context *ce);
|
||||
void (*destroy)(struct kref *kref);
|
||||
|
||||
/* virtual engine/context interface */
|
||||
/* virtual/parallel engine/context interface */
|
||||
struct intel_context *(*create_virtual)(struct intel_engine_cs **engine,
|
||||
unsigned int count);
|
||||
unsigned int count,
|
||||
unsigned long flags);
|
||||
struct intel_context *(*create_parallel)(struct intel_engine_cs **engines,
|
||||
unsigned int num_siblings,
|
||||
unsigned int width);
|
||||
struct intel_engine_cs *(*get_sibling)(struct intel_engine_cs *engine,
|
||||
unsigned int sibling);
|
||||
};
|
||||
@ -113,6 +117,7 @@ struct intel_context {
|
||||
#define CONTEXT_NOPREEMPT 8
|
||||
#define CONTEXT_LRCA_DIRTY 9
|
||||
#define CONTEXT_GUC_INIT 10
|
||||
#define CONTEXT_PERMA_PIN 11
|
||||
|
||||
struct {
|
||||
u64 timeout_us;
|
||||
@ -197,22 +202,80 @@ struct intel_context {
|
||||
struct {
|
||||
/**
|
||||
* @id: handle which is used to uniquely identify this context
|
||||
* with the GuC, protected by guc->contexts_lock
|
||||
* with the GuC, protected by guc->submission_state.lock
|
||||
*/
|
||||
u16 id;
|
||||
/**
|
||||
* @ref: the number of references to the guc_id, when
|
||||
* transitioning in and out of zero protected by
|
||||
* guc->contexts_lock
|
||||
* guc->submission_state.lock
|
||||
*/
|
||||
atomic_t ref;
|
||||
/**
|
||||
* @link: in guc->guc_id_list when the guc_id has no refs but is
|
||||
* still valid, protected by guc->contexts_lock
|
||||
* still valid, protected by guc->submission_state.lock
|
||||
*/
|
||||
struct list_head link;
|
||||
} guc_id;
|
||||
|
||||
/**
|
||||
* @destroyed_link: link in guc->submission_state.destroyed_contexts, in
|
||||
* list when context is pending to be destroyed (deregistered with the
|
||||
* GuC), protected by guc->submission_state.lock
|
||||
*/
|
||||
struct list_head destroyed_link;
|
||||
|
||||
/** @parallel: sub-structure for parallel submission members */
|
||||
struct {
|
||||
union {
|
||||
/**
|
||||
* @child_list: parent's list of children
|
||||
* contexts, no protection as immutable after context
|
||||
* creation
|
||||
*/
|
||||
struct list_head child_list;
|
||||
/**
|
||||
* @child_link: child's link into parent's list of
|
||||
* children
|
||||
*/
|
||||
struct list_head child_link;
|
||||
};
|
||||
/** @parent: pointer to parent if child */
|
||||
struct intel_context *parent;
|
||||
/**
|
||||
* @last_rq: last request submitted on a parallel context, used
|
||||
* to insert submit fences between requests in the parallel
|
||||
* context
|
||||
*/
|
||||
struct i915_request *last_rq;
|
||||
/**
|
||||
* @fence_context: fence context composite fence when doing
|
||||
* parallel submission
|
||||
*/
|
||||
u64 fence_context;
|
||||
/**
|
||||
* @seqno: seqno for composite fence when doing parallel
|
||||
* submission
|
||||
*/
|
||||
u32 seqno;
|
||||
/** @number_children: number of children if parent */
|
||||
u8 number_children;
|
||||
/** @child_index: index into child_list if child */
|
||||
u8 child_index;
|
||||
/** @guc: GuC specific members for parallel submission */
|
||||
struct {
|
||||
/** @wqi_head: head pointer in work queue */
|
||||
u16 wqi_head;
|
||||
/** @wqi_tail: tail pointer in work queue */
|
||||
u16 wqi_tail;
|
||||
/**
|
||||
* @parent_page: page in context state (ce->state) used
|
||||
* by parent for work queue, process descriptor
|
||||
*/
|
||||
u8 parent_page;
|
||||
} guc;
|
||||
} parallel;
|
||||
|
||||
#ifdef CONFIG_DRM_I915_SELFTEST
|
||||
/**
|
||||
* @drop_schedule_enable: Force drop of schedule enable G2H for selftest
|
||||
|
@ -2,6 +2,7 @@
|
||||
#ifndef _INTEL_RINGBUFFER_H_
|
||||
#define _INTEL_RINGBUFFER_H_
|
||||
|
||||
#include <asm/cacheflush.h>
|
||||
#include <drm/drm_util.h>
|
||||
|
||||
#include <linux/hashtable.h>
|
||||
@ -281,9 +282,19 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine)
|
||||
return intel_engine_has_preemption(engine);
|
||||
}
|
||||
|
||||
#define FORCE_VIRTUAL BIT(0)
|
||||
struct intel_context *
|
||||
intel_engine_create_virtual(struct intel_engine_cs **siblings,
|
||||
unsigned int count);
|
||||
unsigned int count, unsigned long flags);
|
||||
|
||||
static inline struct intel_context *
|
||||
intel_engine_create_parallel(struct intel_engine_cs **engines,
|
||||
unsigned int num_engines,
|
||||
unsigned int width)
|
||||
{
|
||||
GEM_BUG_ON(!engines[0]->cops->create_parallel);
|
||||
return engines[0]->cops->create_parallel(engines, num_engines, width);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
intel_virtual_engine_has_heartbeat(const struct intel_engine_cs *engine)
|
||||
|
@ -290,7 +290,8 @@ static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir)
|
||||
GEM_DEBUG_WARN_ON(iir);
|
||||
}
|
||||
|
||||
static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
|
||||
static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
|
||||
u8 logical_instance)
|
||||
{
|
||||
const struct engine_info *info = &intel_engines[id];
|
||||
struct drm_i915_private *i915 = gt->i915;
|
||||
@ -335,6 +336,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
|
||||
|
||||
engine->class = info->class;
|
||||
engine->instance = info->instance;
|
||||
engine->logical_mask = BIT(logical_instance);
|
||||
__sprint_engine_name(engine);
|
||||
|
||||
engine->props.heartbeat_interval_ms =
|
||||
@ -588,6 +590,37 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
|
||||
return info->engine_mask;
|
||||
}
|
||||
|
||||
static void populate_logical_ids(struct intel_gt *gt, u8 *logical_ids,
|
||||
u8 class, const u8 *map, u8 num_instances)
|
||||
{
|
||||
int i, j;
|
||||
u8 current_logical_id = 0;
|
||||
|
||||
for (j = 0; j < num_instances; ++j) {
|
||||
for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
|
||||
if (!HAS_ENGINE(gt, i) ||
|
||||
intel_engines[i].class != class)
|
||||
continue;
|
||||
|
||||
if (intel_engines[i].instance == map[j]) {
|
||||
logical_ids[intel_engines[i].instance] =
|
||||
current_logical_id++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void setup_logical_ids(struct intel_gt *gt, u8 *logical_ids, u8 class)
|
||||
{
|
||||
int i;
|
||||
u8 map[MAX_ENGINE_INSTANCE + 1];
|
||||
|
||||
for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i)
|
||||
map[i] = i;
|
||||
populate_logical_ids(gt, logical_ids, class, map, ARRAY_SIZE(map));
|
||||
}
|
||||
|
||||
/**
|
||||
* intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
|
||||
* @gt: pointer to struct intel_gt
|
||||
@ -599,7 +632,8 @@ int intel_engines_init_mmio(struct intel_gt *gt)
|
||||
struct drm_i915_private *i915 = gt->i915;
|
||||
const unsigned int engine_mask = init_engine_mask(gt);
|
||||
unsigned int mask = 0;
|
||||
unsigned int i;
|
||||
unsigned int i, class;
|
||||
u8 logical_ids[MAX_ENGINE_INSTANCE + 1];
|
||||
int err;
|
||||
|
||||
drm_WARN_ON(&i915->drm, engine_mask == 0);
|
||||
@ -609,15 +643,23 @@ int intel_engines_init_mmio(struct intel_gt *gt)
|
||||
if (i915_inject_probe_failure(i915))
|
||||
return -ENODEV;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
|
||||
if (!HAS_ENGINE(gt, i))
|
||||
continue;
|
||||
for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) {
|
||||
setup_logical_ids(gt, logical_ids, class);
|
||||
|
||||
err = intel_engine_setup(gt, i);
|
||||
if (err)
|
||||
goto cleanup;
|
||||
for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
|
||||
u8 instance = intel_engines[i].instance;
|
||||
|
||||
mask |= BIT(i);
|
||||
if (intel_engines[i].class != class ||
|
||||
!HAS_ENGINE(gt, i))
|
||||
continue;
|
||||
|
||||
err = intel_engine_setup(gt, i,
|
||||
logical_ids[instance]);
|
||||
if (err)
|
||||
goto cleanup;
|
||||
|
||||
mask |= BIT(i);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1911,16 +1953,16 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now)
|
||||
|
||||
struct intel_context *
|
||||
intel_engine_create_virtual(struct intel_engine_cs **siblings,
|
||||
unsigned int count)
|
||||
unsigned int count, unsigned long flags)
|
||||
{
|
||||
if (count == 0)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
if (count == 1)
|
||||
if (count == 1 && !(flags & FORCE_VIRTUAL))
|
||||
return intel_context_create(siblings[0]);
|
||||
|
||||
GEM_BUG_ON(!siblings[0]->cops->create_virtual);
|
||||
return siblings[0]->cops->create_virtual(siblings, count);
|
||||
return siblings[0]->cops->create_virtual(siblings, count, flags);
|
||||
}
|
||||
|
||||
struct i915_request *
|
||||
|
@ -162,6 +162,19 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
|
||||
unsigned long flags;
|
||||
bool result = true;
|
||||
|
||||
/*
|
||||
* This is execlist specific behaviour intended to ensure the GPU is
|
||||
* idle by switching to a known 'safe' context. With GuC submission, the
|
||||
* same idle guarantee is achieved by other means (disabling
|
||||
* scheduling). Further, switching to a 'safe' context has no effect
|
||||
* with GuC submission as the scheduler can just switch back again.
|
||||
*
|
||||
* FIXME: Move this backend scheduler specific behaviour into the
|
||||
* scheduler backend.
|
||||
*/
|
||||
if (intel_engine_uses_guc(engine))
|
||||
return true;
|
||||
|
||||
/* GPU is pointing to the void, as good as in the kernel context. */
|
||||
if (intel_gt_is_wedged(engine->gt))
|
||||
return true;
|
||||
|
@ -6,9 +6,11 @@
|
||||
#ifndef INTEL_ENGINE_PM_H
|
||||
#define INTEL_ENGINE_PM_H
|
||||
|
||||
#include "i915_drv.h"
|
||||
#include "i915_request.h"
|
||||
#include "intel_engine_types.h"
|
||||
#include "intel_wakeref.h"
|
||||
#include "intel_gt_pm.h"
|
||||
|
||||
static inline bool
|
||||
intel_engine_pm_is_awake(const struct intel_engine_cs *engine)
|
||||
@ -16,6 +18,11 @@ intel_engine_pm_is_awake(const struct intel_engine_cs *engine)
|
||||
return intel_wakeref_is_active(&engine->wakeref);
|
||||
}
|
||||
|
||||
static inline void __intel_engine_pm_get(struct intel_engine_cs *engine)
|
||||
{
|
||||
__intel_wakeref_get(&engine->wakeref);
|
||||
}
|
||||
|
||||
static inline void intel_engine_pm_get(struct intel_engine_cs *engine)
|
||||
{
|
||||
intel_wakeref_get(&engine->wakeref);
|
||||
@ -26,6 +33,21 @@ static inline bool intel_engine_pm_get_if_awake(struct intel_engine_cs *engine)
|
||||
return intel_wakeref_get_if_active(&engine->wakeref);
|
||||
}
|
||||
|
||||
static inline void intel_engine_pm_might_get(struct intel_engine_cs *engine)
|
||||
{
|
||||
if (!intel_engine_is_virtual(engine)) {
|
||||
intel_wakeref_might_get(&engine->wakeref);
|
||||
} else {
|
||||
struct intel_gt *gt = engine->gt;
|
||||
struct intel_engine_cs *tengine;
|
||||
intel_engine_mask_t tmp, mask = engine->mask;
|
||||
|
||||
for_each_engine_masked(tengine, gt, mask, tmp)
|
||||
intel_wakeref_might_get(&tengine->wakeref);
|
||||
}
|
||||
intel_gt_pm_might_get(engine->gt);
|
||||
}
|
||||
|
||||
static inline void intel_engine_pm_put(struct intel_engine_cs *engine)
|
||||
{
|
||||
intel_wakeref_put(&engine->wakeref);
|
||||
@ -47,6 +69,21 @@ static inline void intel_engine_pm_flush(struct intel_engine_cs *engine)
|
||||
intel_wakeref_unlock_wait(&engine->wakeref);
|
||||
}
|
||||
|
||||
static inline void intel_engine_pm_might_put(struct intel_engine_cs *engine)
|
||||
{
|
||||
if (!intel_engine_is_virtual(engine)) {
|
||||
intel_wakeref_might_put(&engine->wakeref);
|
||||
} else {
|
||||
struct intel_gt *gt = engine->gt;
|
||||
struct intel_engine_cs *tengine;
|
||||
intel_engine_mask_t tmp, mask = engine->mask;
|
||||
|
||||
for_each_engine_masked(tengine, gt, mask, tmp)
|
||||
intel_wakeref_might_put(&tengine->wakeref);
|
||||
}
|
||||
intel_gt_pm_might_put(engine->gt);
|
||||
}
|
||||
|
||||
static inline struct i915_request *
|
||||
intel_engine_create_kernel_request(struct intel_engine_cs *engine)
|
||||
{
|
||||
|
@ -269,6 +269,13 @@ struct intel_engine_cs {
|
||||
unsigned int guc_id;
|
||||
|
||||
intel_engine_mask_t mask;
|
||||
/**
|
||||
* @logical_mask: logical mask of engine, reported to user space via
|
||||
* query IOCTL and used to communicate with the GuC in logical space.
|
||||
* The logical instance of a physical engine can change based on product
|
||||
* and fusing.
|
||||
*/
|
||||
intel_engine_mask_t logical_mask;
|
||||
|
||||
u8 class;
|
||||
u8 instance;
|
||||
|
@ -201,7 +201,8 @@ static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
|
||||
}
|
||||
|
||||
static struct intel_context *
|
||||
execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count);
|
||||
execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
|
||||
unsigned long flags);
|
||||
|
||||
static struct i915_request *
|
||||
__active_request(const struct intel_timeline * const tl,
|
||||
@ -3784,7 +3785,8 @@ unlock:
|
||||
}
|
||||
|
||||
static struct intel_context *
|
||||
execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
|
||||
execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
|
||||
unsigned long flags)
|
||||
{
|
||||
struct virtual_engine *ve;
|
||||
unsigned int n;
|
||||
@ -3877,6 +3879,7 @@ execlists_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
|
||||
|
||||
ve->siblings[ve->num_siblings++] = sibling;
|
||||
ve->base.mask |= sibling->mask;
|
||||
ve->base.logical_mask |= sibling->logical_mask;
|
||||
|
||||
/*
|
||||
* All physical engines must be compatible for their emission
|
||||
|
@ -13,6 +13,59 @@
|
||||
#include "pxp/intel_pxp_debugfs.h"
|
||||
#include "uc/intel_uc_debugfs.h"
|
||||
|
||||
int intel_gt_debugfs_reset_show(struct intel_gt *gt, u64 *val)
|
||||
{
|
||||
int ret = intel_gt_terminally_wedged(gt);
|
||||
|
||||
switch (ret) {
|
||||
case -EIO:
|
||||
*val = 1;
|
||||
return 0;
|
||||
case 0:
|
||||
*val = 0;
|
||||
return 0;
|
||||
default:
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
int intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val)
|
||||
{
|
||||
/* Flush any previous reset before applying for a new one */
|
||||
wait_event(gt->reset.queue,
|
||||
!test_bit(I915_RESET_BACKOFF, >->reset.flags));
|
||||
|
||||
intel_gt_handle_error(gt, val, I915_ERROR_CAPTURE,
|
||||
"Manually reset engine mask to %llx", val);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* keep the interface clean where the first parameter
|
||||
* is a 'struct intel_gt *' instead of 'void *'
|
||||
*/
|
||||
static int __intel_gt_debugfs_reset_show(void *data, u64 *val)
|
||||
{
|
||||
return intel_gt_debugfs_reset_show(data, val);
|
||||
}
|
||||
|
||||
static int __intel_gt_debugfs_reset_store(void *data, u64 val)
|
||||
{
|
||||
return intel_gt_debugfs_reset_store(data, val);
|
||||
}
|
||||
|
||||
DEFINE_SIMPLE_ATTRIBUTE(reset_fops, __intel_gt_debugfs_reset_show,
|
||||
__intel_gt_debugfs_reset_store, "%llu\n");
|
||||
|
||||
static void gt_debugfs_register(struct intel_gt *gt, struct dentry *root)
|
||||
{
|
||||
static const struct intel_gt_debugfs_file files[] = {
|
||||
{ "reset", &reset_fops, NULL },
|
||||
};
|
||||
|
||||
intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt);
|
||||
}
|
||||
|
||||
void intel_gt_debugfs_register(struct intel_gt *gt)
|
||||
{
|
||||
struct dentry *root;
|
||||
@ -24,6 +77,8 @@ void intel_gt_debugfs_register(struct intel_gt *gt)
|
||||
if (IS_ERR(root))
|
||||
return;
|
||||
|
||||
gt_debugfs_register(gt, root);
|
||||
|
||||
intel_gt_engines_debugfs_register(gt, root);
|
||||
intel_gt_pm_debugfs_register(gt, root);
|
||||
intel_sseu_debugfs_register(gt, root);
|
||||
|
@ -35,4 +35,8 @@ void intel_gt_debugfs_register_files(struct dentry *root,
|
||||
const struct intel_gt_debugfs_file *files,
|
||||
unsigned long count, void *data);
|
||||
|
||||
/* functions that need to be accessed by the upper level non-gt interfaces */
|
||||
int intel_gt_debugfs_reset_show(struct intel_gt *gt, u64 *val);
|
||||
int intel_gt_debugfs_reset_store(struct intel_gt *gt, u64 val);
|
||||
|
||||
#endif /* INTEL_GT_DEBUGFS_H */
|
||||
|
@ -31,6 +31,11 @@ static inline bool intel_gt_pm_get_if_awake(struct intel_gt *gt)
|
||||
return intel_wakeref_get_if_active(>->wakeref);
|
||||
}
|
||||
|
||||
static inline void intel_gt_pm_might_get(struct intel_gt *gt)
|
||||
{
|
||||
intel_wakeref_might_get(>->wakeref);
|
||||
}
|
||||
|
||||
static inline void intel_gt_pm_put(struct intel_gt *gt)
|
||||
{
|
||||
intel_wakeref_put(>->wakeref);
|
||||
@ -41,6 +46,15 @@ static inline void intel_gt_pm_put_async(struct intel_gt *gt)
|
||||
intel_wakeref_put_async(>->wakeref);
|
||||
}
|
||||
|
||||
static inline void intel_gt_pm_might_put(struct intel_gt *gt)
|
||||
{
|
||||
intel_wakeref_might_put(>->wakeref);
|
||||
}
|
||||
|
||||
#define with_intel_gt_pm(gt, tmp) \
|
||||
for (tmp = 1, intel_gt_pm_get(gt); tmp; \
|
||||
intel_gt_pm_put(gt), tmp = 0)
|
||||
|
||||
static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
|
||||
{
|
||||
return intel_wakeref_wait_for_idle(>->wakeref);
|
||||
|
@ -20,6 +20,46 @@
|
||||
#include "intel_uncore.h"
|
||||
#include "vlv_sideband.h"
|
||||
|
||||
int intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt)
|
||||
{
|
||||
atomic_inc(>->user_wakeref);
|
||||
intel_gt_pm_get(gt);
|
||||
if (GRAPHICS_VER(gt->i915) >= 6)
|
||||
intel_uncore_forcewake_user_get(gt->uncore);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt)
|
||||
{
|
||||
if (GRAPHICS_VER(gt->i915) >= 6)
|
||||
intel_uncore_forcewake_user_put(gt->uncore);
|
||||
intel_gt_pm_put(gt);
|
||||
atomic_dec(>->user_wakeref);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int forcewake_user_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct intel_gt *gt = inode->i_private;
|
||||
|
||||
return intel_gt_pm_debugfs_forcewake_user_open(gt);
|
||||
}
|
||||
|
||||
static int forcewake_user_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct intel_gt *gt = inode->i_private;
|
||||
|
||||
return intel_gt_pm_debugfs_forcewake_user_release(gt);
|
||||
}
|
||||
|
||||
static const struct file_operations forcewake_user_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = forcewake_user_open,
|
||||
.release = forcewake_user_release,
|
||||
};
|
||||
|
||||
static int fw_domains_show(struct seq_file *m, void *data)
|
||||
{
|
||||
struct intel_gt *gt = m->private;
|
||||
@ -628,6 +668,7 @@ void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root)
|
||||
{ "drpc", &drpc_fops, NULL },
|
||||
{ "frequency", &frequency_fops, NULL },
|
||||
{ "forcewake", &fw_domains_fops, NULL },
|
||||
{ "forcewake_user", &forcewake_user_fops, NULL},
|
||||
{ "llc", &llc_fops, llc_eval },
|
||||
{ "rps_boost", &rps_boost_fops, rps_eval },
|
||||
};
|
||||
|
@ -13,4 +13,8 @@ struct drm_printer;
|
||||
void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root);
|
||||
void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *m);
|
||||
|
||||
/* functions that need to be accessed by the upper level non-gt interfaces */
|
||||
int intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt);
|
||||
int intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt);
|
||||
|
||||
#endif /* INTEL_GT_PM_DEBUGFS_H */
|
||||
|
@ -3,6 +3,7 @@
|
||||
* Copyright © 2019 Intel Corporation
|
||||
*/
|
||||
|
||||
#include <asm/tsc.h>
|
||||
#include <linux/cpufreq.h>
|
||||
|
||||
#include "i915_drv.h"
|
||||
|
@ -942,6 +942,11 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
|
||||
context_size += PAGE_SIZE;
|
||||
}
|
||||
|
||||
if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {
|
||||
ce->parallel.guc.parent_page = context_size / PAGE_SIZE;
|
||||
context_size += PARENT_SCRATCH_SIZE;
|
||||
}
|
||||
|
||||
obj = i915_gem_object_create_lmem(engine->i915, context_size,
|
||||
I915_BO_ALLOC_PM_VOLATILE);
|
||||
if (IS_ERR(obj))
|
||||
|
@ -292,7 +292,7 @@ static void xcs_sanitize(struct intel_engine_cs *engine)
|
||||
sanitize_hwsp(engine);
|
||||
|
||||
/* And scrub the dirty cachelines for the HWSP */
|
||||
clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
|
||||
drm_clflush_virt_range(engine->status_page.addr, PAGE_SIZE);
|
||||
|
||||
intel_engine_reset_pinned_contexts(engine);
|
||||
}
|
||||
|
@ -64,7 +64,7 @@ intel_timeline_pin_map(struct intel_timeline *timeline)
|
||||
|
||||
timeline->hwsp_map = vaddr;
|
||||
timeline->hwsp_seqno = memset(vaddr + ofs, 0, TIMELINE_SEQNO_BYTES);
|
||||
clflush(vaddr + ofs);
|
||||
drm_clflush_virt_range(vaddr + ofs, TIMELINE_SEQNO_BYTES);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -225,7 +225,7 @@ void intel_timeline_reset_seqno(const struct intel_timeline *tl)
|
||||
|
||||
memset(hwsp_seqno + 1, 0, TIMELINE_SEQNO_BYTES - sizeof(*hwsp_seqno));
|
||||
WRITE_ONCE(*hwsp_seqno, tl->seqno);
|
||||
clflush(hwsp_seqno);
|
||||
drm_clflush_virt_range(hwsp_seqno, TIMELINE_SEQNO_BYTES);
|
||||
}
|
||||
|
||||
void intel_timeline_enter(struct intel_timeline *tl)
|
||||
|
@ -3733,7 +3733,7 @@ static int nop_virtual_engine(struct intel_gt *gt,
|
||||
GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
|
||||
|
||||
for (n = 0; n < nctx; n++) {
|
||||
ve[n] = intel_engine_create_virtual(siblings, nsibling);
|
||||
ve[n] = intel_engine_create_virtual(siblings, nsibling, 0);
|
||||
if (IS_ERR(ve[n])) {
|
||||
err = PTR_ERR(ve[n]);
|
||||
nctx = n;
|
||||
@ -3929,7 +3929,7 @@ static int mask_virtual_engine(struct intel_gt *gt,
|
||||
* restrict it to our desired engine within the virtual engine.
|
||||
*/
|
||||
|
||||
ve = intel_engine_create_virtual(siblings, nsibling);
|
||||
ve = intel_engine_create_virtual(siblings, nsibling, 0);
|
||||
if (IS_ERR(ve)) {
|
||||
err = PTR_ERR(ve);
|
||||
goto out_close;
|
||||
@ -4060,7 +4060,7 @@ static int slicein_virtual_engine(struct intel_gt *gt,
|
||||
i915_request_add(rq);
|
||||
}
|
||||
|
||||
ce = intel_engine_create_virtual(siblings, nsibling);
|
||||
ce = intel_engine_create_virtual(siblings, nsibling, 0);
|
||||
if (IS_ERR(ce)) {
|
||||
err = PTR_ERR(ce);
|
||||
goto out;
|
||||
@ -4112,7 +4112,7 @@ static int sliceout_virtual_engine(struct intel_gt *gt,
|
||||
|
||||
/* XXX We do not handle oversubscription and fairness with normal rq */
|
||||
for (n = 0; n < nsibling; n++) {
|
||||
ce = intel_engine_create_virtual(siblings, nsibling);
|
||||
ce = intel_engine_create_virtual(siblings, nsibling, 0);
|
||||
if (IS_ERR(ce)) {
|
||||
err = PTR_ERR(ce);
|
||||
goto out;
|
||||
@ -4214,7 +4214,7 @@ static int preserved_virtual_engine(struct intel_gt *gt,
|
||||
if (err)
|
||||
goto out_scratch;
|
||||
|
||||
ve = intel_engine_create_virtual(siblings, nsibling);
|
||||
ve = intel_engine_create_virtual(siblings, nsibling, 0);
|
||||
if (IS_ERR(ve)) {
|
||||
err = PTR_ERR(ve);
|
||||
goto out_scratch;
|
||||
@ -4354,7 +4354,7 @@ static int reset_virtual_engine(struct intel_gt *gt,
|
||||
if (igt_spinner_init(&spin, gt))
|
||||
return -ENOMEM;
|
||||
|
||||
ve = intel_engine_create_virtual(siblings, nsibling);
|
||||
ve = intel_engine_create_virtual(siblings, nsibling, 0);
|
||||
if (IS_ERR(ve)) {
|
||||
err = PTR_ERR(ve);
|
||||
goto out_spin;
|
||||
|
@ -142,6 +142,7 @@ enum intel_guc_action {
|
||||
INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
|
||||
INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
|
||||
INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
|
||||
INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
|
||||
INTEL_GUC_ACTION_RESET_CLIENT = 0x5507,
|
||||
INTEL_GUC_ACTION_LIMIT
|
||||
};
|
||||
|
@ -756,3 +756,32 @@ void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void intel_guc_write_barrier(struct intel_guc *guc)
|
||||
{
|
||||
struct intel_gt *gt = guc_to_gt(guc);
|
||||
|
||||
if (i915_gem_object_is_lmem(guc->ct.vma->obj)) {
|
||||
/*
|
||||
* Ensure intel_uncore_write_fw can be used rather than
|
||||
* intel_uncore_write.
|
||||
*/
|
||||
GEM_BUG_ON(guc->send_regs.fw_domains);
|
||||
|
||||
/*
|
||||
* This register is used by the i915 and GuC for MMIO based
|
||||
* communication. Once we are in this code CTBs are the only
|
||||
* method the i915 uses to communicate with the GuC so it is
|
||||
* safe to write to this register (a value of 0 is NOP for MMIO
|
||||
* communication). If we ever start mixing CTBs and MMIOs a new
|
||||
* register will have to be chosen. This function is also used
|
||||
* to enforce ordering of a work queue item write and an update
|
||||
* to the process descriptor. When a work queue is being used,
|
||||
* CTBs are also the only mechanism of communication.
|
||||
*/
|
||||
intel_uncore_write_fw(gt->uncore, GEN11_SOFT_SCRATCH(0), 0);
|
||||
} else {
|
||||
/* wmb() sufficient for a barrier if in smem */
|
||||
wmb();
|
||||
}
|
||||
}
|
||||
|
@ -46,6 +46,15 @@ struct intel_guc {
|
||||
* submitted until the stalled request is processed.
|
||||
*/
|
||||
struct i915_request *stalled_request;
|
||||
/**
|
||||
* @submission_stall_reason: reason why submission is stalled
|
||||
*/
|
||||
enum {
|
||||
STALL_NONE,
|
||||
STALL_REGISTER_CONTEXT,
|
||||
STALL_MOVE_LRC_TAIL,
|
||||
STALL_ADD_REQUEST,
|
||||
} submission_stall_reason;
|
||||
|
||||
/* intel_guc_recv interrupt related state */
|
||||
/** @irq_lock: protects GuC irq state */
|
||||
@ -71,16 +80,41 @@ struct intel_guc {
|
||||
} interrupts;
|
||||
|
||||
/**
|
||||
* @contexts_lock: protects guc_ids, guc_id_list, ce->guc_id.id, and
|
||||
* ce->guc_id.ref when transitioning in and out of zero
|
||||
* @submission_state: sub-structure for submission state protected by
|
||||
* single lock
|
||||
*/
|
||||
spinlock_t contexts_lock;
|
||||
/** @guc_ids: used to allocate unique ce->guc_id.id values */
|
||||
struct ida guc_ids;
|
||||
/**
|
||||
* @guc_id_list: list of intel_context with valid guc_ids but no refs
|
||||
*/
|
||||
struct list_head guc_id_list;
|
||||
struct {
|
||||
/**
|
||||
* @lock: protects everything in submission_state,
|
||||
* ce->guc_id.id, and ce->guc_id.ref when transitioning in and
|
||||
* out of zero
|
||||
*/
|
||||
spinlock_t lock;
|
||||
/**
|
||||
* @guc_ids: used to allocate new guc_ids, single-lrc
|
||||
*/
|
||||
struct ida guc_ids;
|
||||
/**
|
||||
* @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc
|
||||
*/
|
||||
unsigned long *guc_ids_bitmap;
|
||||
/**
|
||||
* @guc_id_list: list of intel_context with valid guc_ids but no
|
||||
* refs
|
||||
*/
|
||||
struct list_head guc_id_list;
|
||||
/**
|
||||
* @destroyed_contexts: list of contexts waiting to be destroyed
|
||||
* (deregistered with the GuC)
|
||||
*/
|
||||
struct list_head destroyed_contexts;
|
||||
/**
|
||||
* @destroyed_worker: worker to deregister contexts, need as we
|
||||
* need to take a GT PM reference and can't from destroy
|
||||
* function as it might be in an atomic context (no sleeping)
|
||||
*/
|
||||
struct work_struct destroyed_worker;
|
||||
} submission_state;
|
||||
|
||||
/**
|
||||
* @submission_supported: tracks whether we support GuC submission on
|
||||
@ -342,4 +376,6 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc);
|
||||
|
||||
void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p);
|
||||
|
||||
void intel_guc_write_barrier(struct intel_guc *guc);
|
||||
|
||||
#endif
|
||||
|
@ -176,7 +176,7 @@ static void guc_mapping_table_init(struct intel_gt *gt,
|
||||
for_each_engine(engine, gt, id) {
|
||||
u8 guc_class = engine_class_to_guc_class(engine->class);
|
||||
|
||||
system_info->mapping_table[guc_class][engine->instance] =
|
||||
system_info->mapping_table[guc_class][ilog2(engine->logical_mask)] =
|
||||
engine->instance;
|
||||
}
|
||||
}
|
||||
|
@ -383,28 +383,6 @@ static u32 ct_get_next_fence(struct intel_guc_ct *ct)
|
||||
return ++ct->requests.last_fence;
|
||||
}
|
||||
|
||||
static void write_barrier(struct intel_guc_ct *ct)
|
||||
{
|
||||
struct intel_guc *guc = ct_to_guc(ct);
|
||||
struct intel_gt *gt = guc_to_gt(guc);
|
||||
|
||||
if (i915_gem_object_is_lmem(guc->ct.vma->obj)) {
|
||||
GEM_BUG_ON(guc->send_regs.fw_domains);
|
||||
/*
|
||||
* This register is used by the i915 and GuC for MMIO based
|
||||
* communication. Once we are in this code CTBs are the only
|
||||
* method the i915 uses to communicate with the GuC so it is
|
||||
* safe to write to this register (a value of 0 is NOP for MMIO
|
||||
* communication). If we ever start mixing CTBs and MMIOs a new
|
||||
* register will have to be chosen.
|
||||
*/
|
||||
intel_uncore_write_fw(gt->uncore, GEN11_SOFT_SCRATCH(0), 0);
|
||||
} else {
|
||||
/* wmb() sufficient for a barrier if in smem */
|
||||
wmb();
|
||||
}
|
||||
}
|
||||
|
||||
static int ct_write(struct intel_guc_ct *ct,
|
||||
const u32 *action,
|
||||
u32 len /* in dwords */,
|
||||
@ -474,7 +452,7 @@ static int ct_write(struct intel_guc_ct *ct,
|
||||
* make sure H2G buffer update and LRC tail update (if this triggering a
|
||||
* submission) are visible before updating the descriptor tail
|
||||
*/
|
||||
write_barrier(ct);
|
||||
intel_guc_write_barrier(ct_to_guc(ct));
|
||||
|
||||
/* update local copies */
|
||||
ctb->tail = tail;
|
||||
|
@ -52,27 +52,27 @@
|
||||
|
||||
#define GUC_DOORBELL_INVALID 256
|
||||
|
||||
#define GUC_WQ_SIZE (PAGE_SIZE * 2)
|
||||
|
||||
/* Work queue item header definitions */
|
||||
/*
|
||||
* Work queue item header definitions
|
||||
*
|
||||
* Work queue is circular buffer used to submit complex (multi-lrc) submissions
|
||||
* to the GuC. A work queue item is an entry in the circular buffer.
|
||||
*/
|
||||
#define WQ_STATUS_ACTIVE 1
|
||||
#define WQ_STATUS_SUSPENDED 2
|
||||
#define WQ_STATUS_CMD_ERROR 3
|
||||
#define WQ_STATUS_ENGINE_ID_NOT_USED 4
|
||||
#define WQ_STATUS_SUSPENDED_FROM_RESET 5
|
||||
#define WQ_TYPE_SHIFT 0
|
||||
#define WQ_TYPE_BATCH_BUF (0x1 << WQ_TYPE_SHIFT)
|
||||
#define WQ_TYPE_PSEUDO (0x2 << WQ_TYPE_SHIFT)
|
||||
#define WQ_TYPE_INORDER (0x3 << WQ_TYPE_SHIFT)
|
||||
#define WQ_TYPE_NOOP (0x4 << WQ_TYPE_SHIFT)
|
||||
#define WQ_TARGET_SHIFT 10
|
||||
#define WQ_LEN_SHIFT 16
|
||||
#define WQ_NO_WCFLUSH_WAIT (1 << 27)
|
||||
#define WQ_PRESENT_WORKLOAD (1 << 28)
|
||||
#define WQ_TYPE_BATCH_BUF 0x1
|
||||
#define WQ_TYPE_PSEUDO 0x2
|
||||
#define WQ_TYPE_INORDER 0x3
|
||||
#define WQ_TYPE_NOOP 0x4
|
||||
#define WQ_TYPE_MULTI_LRC 0x5
|
||||
#define WQ_TYPE_MASK GENMASK(7, 0)
|
||||
#define WQ_LEN_MASK GENMASK(26, 16)
|
||||
|
||||
#define WQ_RING_TAIL_SHIFT 20
|
||||
#define WQ_RING_TAIL_MAX 0x7FF /* 2^11 QWords */
|
||||
#define WQ_RING_TAIL_MASK (WQ_RING_TAIL_MAX << WQ_RING_TAIL_SHIFT)
|
||||
#define WQ_GUC_ID_MASK GENMASK(15, 0)
|
||||
#define WQ_RING_TAIL_MASK GENMASK(28, 18)
|
||||
|
||||
#define GUC_STAGE_DESC_ATTR_ACTIVE BIT(0)
|
||||
#define GUC_STAGE_DESC_ATTR_PENDING_DB BIT(1)
|
||||
@ -186,7 +186,7 @@ struct guc_process_desc {
|
||||
u32 wq_status;
|
||||
u32 engine_presence;
|
||||
u32 priority;
|
||||
u32 reserved[30];
|
||||
u32 reserved[36];
|
||||
} __packed;
|
||||
|
||||
#define CONTEXT_REGISTRATION_FLAG_KMD BIT(0)
|
||||
|
File diff suppressed because it is too large
Load Diff
179
drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
Normal file
179
drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
Normal file
@ -0,0 +1,179 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
* Copyright <EFBFBD><EFBFBD> 2019 Intel Corporation
|
||||
*/
|
||||
|
||||
#include "selftests/igt_spinner.h"
|
||||
#include "selftests/igt_reset.h"
|
||||
#include "selftests/intel_scheduler_helpers.h"
|
||||
#include "gt/intel_engine_heartbeat.h"
|
||||
#include "gem/selftests/mock_context.h"
|
||||
|
||||
static void logical_sort(struct intel_engine_cs **engines, int num_engines)
|
||||
{
|
||||
struct intel_engine_cs *sorted[MAX_ENGINE_INSTANCE + 1];
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < num_engines; ++i)
|
||||
for (j = 0; j < MAX_ENGINE_INSTANCE + 1; ++j) {
|
||||
if (engines[j]->logical_mask & BIT(i)) {
|
||||
sorted[i] = engines[j];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(*engines, *sorted,
|
||||
sizeof(struct intel_engine_cs *) * num_engines);
|
||||
}
|
||||
|
||||
static struct intel_context *
|
||||
multi_lrc_create_parent(struct intel_gt *gt, u8 class,
|
||||
unsigned long flags)
|
||||
{
|
||||
struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
|
||||
struct intel_engine_cs *engine;
|
||||
enum intel_engine_id id;
|
||||
int i = 0;
|
||||
|
||||
for_each_engine(engine, gt, id) {
|
||||
if (engine->class != class)
|
||||
continue;
|
||||
|
||||
siblings[i++] = engine;
|
||||
}
|
||||
|
||||
if (i <= 1)
|
||||
return ERR_PTR(0);
|
||||
|
||||
logical_sort(siblings, i);
|
||||
|
||||
return intel_engine_create_parallel(siblings, 1, i);
|
||||
}
|
||||
|
||||
static void multi_lrc_context_unpin(struct intel_context *ce)
|
||||
{
|
||||
struct intel_context *child;
|
||||
|
||||
GEM_BUG_ON(!intel_context_is_parent(ce));
|
||||
|
||||
for_each_child(ce, child)
|
||||
intel_context_unpin(child);
|
||||
intel_context_unpin(ce);
|
||||
}
|
||||
|
||||
static void multi_lrc_context_put(struct intel_context *ce)
|
||||
{
|
||||
GEM_BUG_ON(!intel_context_is_parent(ce));
|
||||
|
||||
/*
|
||||
* Only the parent gets the creation ref put in the uAPI, the parent
|
||||
* itself is responsible for creation ref put on the children.
|
||||
*/
|
||||
intel_context_put(ce);
|
||||
}
|
||||
|
||||
static struct i915_request *
|
||||
multi_lrc_nop_request(struct intel_context *ce)
|
||||
{
|
||||
struct intel_context *child;
|
||||
struct i915_request *rq, *child_rq;
|
||||
int i = 0;
|
||||
|
||||
GEM_BUG_ON(!intel_context_is_parent(ce));
|
||||
|
||||
rq = intel_context_create_request(ce);
|
||||
if (IS_ERR(rq))
|
||||
return rq;
|
||||
|
||||
i915_request_get(rq);
|
||||
i915_request_add(rq);
|
||||
|
||||
for_each_child(ce, child) {
|
||||
child_rq = intel_context_create_request(child);
|
||||
if (IS_ERR(child_rq))
|
||||
goto child_error;
|
||||
|
||||
if (++i == ce->parallel.number_children)
|
||||
set_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL,
|
||||
&child_rq->fence.flags);
|
||||
i915_request_add(child_rq);
|
||||
}
|
||||
|
||||
return rq;
|
||||
|
||||
child_error:
|
||||
i915_request_put(rq);
|
||||
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
static int __intel_guc_multi_lrc_basic(struct intel_gt *gt, unsigned int class)
|
||||
{
|
||||
struct intel_context *parent;
|
||||
struct i915_request *rq;
|
||||
int ret;
|
||||
|
||||
parent = multi_lrc_create_parent(gt, class, 0);
|
||||
if (IS_ERR(parent)) {
|
||||
pr_err("Failed creating contexts: %ld", PTR_ERR(parent));
|
||||
return PTR_ERR(parent);
|
||||
} else if (!parent) {
|
||||
pr_debug("Not enough engines in class: %d", class);
|
||||
return 0;
|
||||
}
|
||||
|
||||
rq = multi_lrc_nop_request(parent);
|
||||
if (IS_ERR(rq)) {
|
||||
ret = PTR_ERR(rq);
|
||||
pr_err("Failed creating requests: %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = intel_selftest_wait_for_rq(rq);
|
||||
if (ret)
|
||||
pr_err("Failed waiting on request: %d", ret);
|
||||
|
||||
i915_request_put(rq);
|
||||
|
||||
if (ret >= 0) {
|
||||
ret = intel_gt_wait_for_idle(gt, HZ * 5);
|
||||
if (ret < 0)
|
||||
pr_err("GT failed to idle: %d\n", ret);
|
||||
}
|
||||
|
||||
out:
|
||||
multi_lrc_context_unpin(parent);
|
||||
multi_lrc_context_put(parent);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int intel_guc_multi_lrc_basic(void *arg)
|
||||
{
|
||||
struct intel_gt *gt = arg;
|
||||
unsigned int class;
|
||||
int ret;
|
||||
|
||||
for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) {
|
||||
ret = __intel_guc_multi_lrc_basic(gt, class);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int intel_guc_multi_lrc_live_selftests(struct drm_i915_private *i915)
|
||||
{
|
||||
static const struct i915_subtest tests[] = {
|
||||
SUBTEST(intel_guc_multi_lrc_basic),
|
||||
};
|
||||
struct intel_gt *gt = &i915->gt;
|
||||
|
||||
if (intel_gt_is_wedged(gt))
|
||||
return 0;
|
||||
|
||||
if (!intel_uc_uses_guc_submission(>->uc))
|
||||
return 0;
|
||||
|
||||
return intel_gt_live_subtests(tests, gt);
|
||||
}
|
@ -35,6 +35,7 @@
|
||||
#include "gt/intel_gt.h"
|
||||
#include "gt/intel_gt_buffer_pool.h"
|
||||
#include "gt/intel_gt_clock_utils.h"
|
||||
#include "gt/intel_gt_debugfs.h"
|
||||
#include "gt/intel_gt_pm.h"
|
||||
#include "gt/intel_gt_pm_debugfs.h"
|
||||
#include "gt/intel_gt_requests.h"
|
||||
@ -553,36 +554,18 @@ static int i915_wa_registers(struct seq_file *m, void *unused)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
i915_wedged_get(void *data, u64 *val)
|
||||
static int i915_wedged_get(void *data, u64 *val)
|
||||
{
|
||||
struct drm_i915_private *i915 = data;
|
||||
int ret = intel_gt_terminally_wedged(&i915->gt);
|
||||
|
||||
switch (ret) {
|
||||
case -EIO:
|
||||
*val = 1;
|
||||
return 0;
|
||||
case 0:
|
||||
*val = 0;
|
||||
return 0;
|
||||
default:
|
||||
return ret;
|
||||
}
|
||||
return intel_gt_debugfs_reset_show(&i915->gt, val);
|
||||
}
|
||||
|
||||
static int
|
||||
i915_wedged_set(void *data, u64 val)
|
||||
static int i915_wedged_set(void *data, u64 val)
|
||||
{
|
||||
struct drm_i915_private *i915 = data;
|
||||
|
||||
/* Flush any previous reset before applying for a new one */
|
||||
wait_event(i915->gt.reset.queue,
|
||||
!test_bit(I915_RESET_BACKOFF, &i915->gt.reset.flags));
|
||||
|
||||
intel_gt_handle_error(&i915->gt, val, I915_ERROR_CAPTURE,
|
||||
"Manually set wedged engine mask = %llx", val);
|
||||
return 0;
|
||||
return intel_gt_debugfs_reset_store(&i915->gt, val);
|
||||
}
|
||||
|
||||
DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
|
||||
@ -727,27 +710,15 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
|
||||
static int i915_forcewake_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct drm_i915_private *i915 = inode->i_private;
|
||||
struct intel_gt *gt = &i915->gt;
|
||||
|
||||
atomic_inc(>->user_wakeref);
|
||||
intel_gt_pm_get(gt);
|
||||
if (GRAPHICS_VER(i915) >= 6)
|
||||
intel_uncore_forcewake_user_get(gt->uncore);
|
||||
|
||||
return 0;
|
||||
return intel_gt_pm_debugfs_forcewake_user_open(&i915->gt);
|
||||
}
|
||||
|
||||
static int i915_forcewake_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct drm_i915_private *i915 = inode->i_private;
|
||||
struct intel_gt *gt = &i915->gt;
|
||||
|
||||
if (GRAPHICS_VER(i915) >= 6)
|
||||
intel_uncore_forcewake_user_put(&i915->uncore);
|
||||
intel_gt_pm_put(gt);
|
||||
atomic_dec(>->user_wakeref);
|
||||
|
||||
return 0;
|
||||
return intel_gt_pm_debugfs_forcewake_user_release(&i915->gt);
|
||||
}
|
||||
|
||||
static const struct file_operations i915_forcewake_fops = {
|
||||
|
@ -124,7 +124,9 @@ query_engine_info(struct drm_i915_private *i915,
|
||||
for_each_uabi_engine(engine, i915) {
|
||||
info.engine.engine_class = engine->uabi_class;
|
||||
info.engine.engine_instance = engine->uabi_instance;
|
||||
info.flags = I915_ENGINE_INFO_HAS_LOGICAL_INSTANCE;
|
||||
info.capabilities = engine->uabi_capabilities;
|
||||
info.logical_instance = ilog2(engine->logical_mask);
|
||||
|
||||
if (copy_to_user(info_ptr, &info, sizeof(info)))
|
||||
return -EFAULT;
|
||||
|
@ -1335,6 +1335,25 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline bool is_parallel_rq(struct i915_request *rq)
|
||||
{
|
||||
return intel_context_is_parallel(rq->context);
|
||||
}
|
||||
|
||||
static inline struct intel_context *request_to_parent(struct i915_request *rq)
|
||||
{
|
||||
return intel_context_to_parent(rq->context);
|
||||
}
|
||||
|
||||
static bool is_same_parallel_context(struct i915_request *to,
|
||||
struct i915_request *from)
|
||||
{
|
||||
if (is_parallel_rq(to))
|
||||
return request_to_parent(to) == request_to_parent(from);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
int
|
||||
i915_request_await_execution(struct i915_request *rq,
|
||||
struct dma_fence *fence)
|
||||
@ -1366,11 +1385,14 @@ i915_request_await_execution(struct i915_request *rq,
|
||||
* want to run our callback in all cases.
|
||||
*/
|
||||
|
||||
if (dma_fence_is_i915(fence))
|
||||
if (dma_fence_is_i915(fence)) {
|
||||
if (is_same_parallel_context(rq, to_request(fence)))
|
||||
continue;
|
||||
ret = __i915_request_await_execution(rq,
|
||||
to_request(fence));
|
||||
else
|
||||
} else {
|
||||
ret = i915_request_await_external(rq, fence);
|
||||
}
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
} while (--nchild);
|
||||
@ -1471,10 +1493,13 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
|
||||
fence))
|
||||
continue;
|
||||
|
||||
if (dma_fence_is_i915(fence))
|
||||
if (dma_fence_is_i915(fence)) {
|
||||
if (is_same_parallel_context(rq, to_request(fence)))
|
||||
continue;
|
||||
ret = i915_request_await_request(rq, to_request(fence));
|
||||
else
|
||||
} else {
|
||||
ret = i915_request_await_external(rq, fence);
|
||||
}
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
@ -1549,6 +1574,81 @@ i915_request_await_object(struct i915_request *to,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct i915_request *
|
||||
__i915_request_ensure_parallel_ordering(struct i915_request *rq,
|
||||
struct intel_timeline *timeline)
|
||||
{
|
||||
struct i915_request *prev;
|
||||
|
||||
GEM_BUG_ON(!is_parallel_rq(rq));
|
||||
|
||||
prev = request_to_parent(rq)->parallel.last_rq;
|
||||
if (prev) {
|
||||
if (!__i915_request_is_complete(prev)) {
|
||||
i915_sw_fence_await_sw_fence(&rq->submit,
|
||||
&prev->submit,
|
||||
&rq->submitq);
|
||||
|
||||
if (rq->engine->sched_engine->schedule)
|
||||
__i915_sched_node_add_dependency(&rq->sched,
|
||||
&prev->sched,
|
||||
&rq->dep,
|
||||
0);
|
||||
}
|
||||
i915_request_put(prev);
|
||||
}
|
||||
|
||||
request_to_parent(rq)->parallel.last_rq = i915_request_get(rq);
|
||||
|
||||
return to_request(__i915_active_fence_set(&timeline->last_request,
|
||||
&rq->fence));
|
||||
}
|
||||
|
||||
static struct i915_request *
|
||||
__i915_request_ensure_ordering(struct i915_request *rq,
|
||||
struct intel_timeline *timeline)
|
||||
{
|
||||
struct i915_request *prev;
|
||||
|
||||
GEM_BUG_ON(is_parallel_rq(rq));
|
||||
|
||||
prev = to_request(__i915_active_fence_set(&timeline->last_request,
|
||||
&rq->fence));
|
||||
|
||||
if (prev && !__i915_request_is_complete(prev)) {
|
||||
bool uses_guc = intel_engine_uses_guc(rq->engine);
|
||||
bool pow2 = is_power_of_2(READ_ONCE(prev->engine)->mask |
|
||||
rq->engine->mask);
|
||||
bool same_context = prev->context == rq->context;
|
||||
|
||||
/*
|
||||
* The requests are supposed to be kept in order. However,
|
||||
* we need to be wary in case the timeline->last_request
|
||||
* is used as a barrier for external modification to this
|
||||
* context.
|
||||
*/
|
||||
GEM_BUG_ON(same_context &&
|
||||
i915_seqno_passed(prev->fence.seqno,
|
||||
rq->fence.seqno));
|
||||
|
||||
if ((same_context && uses_guc) || (!uses_guc && pow2))
|
||||
i915_sw_fence_await_sw_fence(&rq->submit,
|
||||
&prev->submit,
|
||||
&rq->submitq);
|
||||
else
|
||||
__i915_sw_fence_await_dma_fence(&rq->submit,
|
||||
&prev->fence,
|
||||
&rq->dmaq);
|
||||
if (rq->engine->sched_engine->schedule)
|
||||
__i915_sched_node_add_dependency(&rq->sched,
|
||||
&prev->sched,
|
||||
&rq->dep,
|
||||
0);
|
||||
}
|
||||
|
||||
return prev;
|
||||
}
|
||||
|
||||
static struct i915_request *
|
||||
__i915_request_add_to_timeline(struct i915_request *rq)
|
||||
{
|
||||
@ -1574,38 +1674,21 @@ __i915_request_add_to_timeline(struct i915_request *rq)
|
||||
* complete (to maximise our greedy late load balancing) and this
|
||||
* precludes optimising to use semaphores serialisation of a single
|
||||
* timeline across engines.
|
||||
*
|
||||
* We do not order parallel submission requests on the timeline as each
|
||||
* parallel submission context has its own timeline and the ordering
|
||||
* rules for parallel requests are that they must be submitted in the
|
||||
* order received from the execbuf IOCTL. So rather than using the
|
||||
* timeline we store a pointer to last request submitted in the
|
||||
* relationship in the gem context and insert a submission fence
|
||||
* between that request and request passed into this function or
|
||||
* alternatively we use completion fence if gem context has a single
|
||||
* timeline and this is the first submission of an execbuf IOCTL.
|
||||
*/
|
||||
prev = to_request(__i915_active_fence_set(&timeline->last_request,
|
||||
&rq->fence));
|
||||
if (prev && !__i915_request_is_complete(prev)) {
|
||||
bool uses_guc = intel_engine_uses_guc(rq->engine);
|
||||
|
||||
/*
|
||||
* The requests are supposed to be kept in order. However,
|
||||
* we need to be wary in case the timeline->last_request
|
||||
* is used as a barrier for external modification to this
|
||||
* context.
|
||||
*/
|
||||
GEM_BUG_ON(prev->context == rq->context &&
|
||||
i915_seqno_passed(prev->fence.seqno,
|
||||
rq->fence.seqno));
|
||||
|
||||
if ((!uses_guc &&
|
||||
is_power_of_2(READ_ONCE(prev->engine)->mask | rq->engine->mask)) ||
|
||||
(uses_guc && prev->context == rq->context))
|
||||
i915_sw_fence_await_sw_fence(&rq->submit,
|
||||
&prev->submit,
|
||||
&rq->submitq);
|
||||
else
|
||||
__i915_sw_fence_await_dma_fence(&rq->submit,
|
||||
&prev->fence,
|
||||
&rq->dmaq);
|
||||
if (rq->engine->sched_engine->schedule)
|
||||
__i915_sched_node_add_dependency(&rq->sched,
|
||||
&prev->sched,
|
||||
&rq->dep,
|
||||
0);
|
||||
}
|
||||
if (likely(!is_parallel_rq(rq)))
|
||||
prev = __i915_request_ensure_ordering(rq, timeline);
|
||||
else
|
||||
prev = __i915_request_ensure_parallel_ordering(rq, timeline);
|
||||
|
||||
/*
|
||||
* Make sure that no request gazumped us - if it was allocated after
|
||||
|
@ -139,6 +139,29 @@ enum {
|
||||
* the GPU. Here we track such boost requests on a per-request basis.
|
||||
*/
|
||||
I915_FENCE_FLAG_BOOST,
|
||||
|
||||
/*
|
||||
* I915_FENCE_FLAG_SUBMIT_PARALLEL - request with a context in a
|
||||
* parent-child relationship (parallel submission, multi-lrc) should
|
||||
* trigger a submission to the GuC rather than just moving the context
|
||||
* tail.
|
||||
*/
|
||||
I915_FENCE_FLAG_SUBMIT_PARALLEL,
|
||||
|
||||
/*
|
||||
* I915_FENCE_FLAG_SKIP_PARALLEL - request with a context in a
|
||||
* parent-child relationship (parallel submission, multi-lrc) that
|
||||
* hit an error while generating requests in the execbuf IOCTL.
|
||||
* Indicates this request should be skipped as another request in
|
||||
* submission / relationship encoutered an error.
|
||||
*/
|
||||
I915_FENCE_FLAG_SKIP_PARALLEL,
|
||||
|
||||
/*
|
||||
* I915_FENCE_FLAG_COMPOSITE - Indicates fence is part of a composite
|
||||
* fence (dma_fence_array) and i915 generated for parallel submission.
|
||||
*/
|
||||
I915_FENCE_FLAG_COMPOSITE,
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -1234,9 +1234,10 @@ int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
|
||||
return i915_active_add_request(&vma->active, rq);
|
||||
}
|
||||
|
||||
int i915_vma_move_to_active(struct i915_vma *vma,
|
||||
struct i915_request *rq,
|
||||
unsigned int flags)
|
||||
int _i915_vma_move_to_active(struct i915_vma *vma,
|
||||
struct i915_request *rq,
|
||||
struct dma_fence *fence,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct drm_i915_gem_object *obj = vma->obj;
|
||||
int err;
|
||||
@ -1257,9 +1258,11 @@ int i915_vma_move_to_active(struct i915_vma *vma,
|
||||
intel_frontbuffer_put(front);
|
||||
}
|
||||
|
||||
dma_resv_add_excl_fence(vma->resv, &rq->fence);
|
||||
obj->write_domain = I915_GEM_DOMAIN_RENDER;
|
||||
obj->read_domains = 0;
|
||||
if (fence) {
|
||||
dma_resv_add_excl_fence(vma->resv, fence);
|
||||
obj->write_domain = I915_GEM_DOMAIN_RENDER;
|
||||
obj->read_domains = 0;
|
||||
}
|
||||
} else {
|
||||
if (!(flags & __EXEC_OBJECT_NO_RESERVE)) {
|
||||
err = dma_resv_reserve_shared(vma->resv, 1);
|
||||
@ -1267,8 +1270,10 @@ int i915_vma_move_to_active(struct i915_vma *vma,
|
||||
return err;
|
||||
}
|
||||
|
||||
dma_resv_add_shared_fence(vma->resv, &rq->fence);
|
||||
obj->write_domain = 0;
|
||||
if (fence) {
|
||||
dma_resv_add_shared_fence(vma->resv, fence);
|
||||
obj->write_domain = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (flags & EXEC_OBJECT_NEEDS_FENCE && vma->fence)
|
||||
|
@ -57,9 +57,16 @@ static inline bool i915_vma_is_active(const struct i915_vma *vma)
|
||||
|
||||
int __must_check __i915_vma_move_to_active(struct i915_vma *vma,
|
||||
struct i915_request *rq);
|
||||
int __must_check i915_vma_move_to_active(struct i915_vma *vma,
|
||||
struct i915_request *rq,
|
||||
unsigned int flags);
|
||||
int __must_check _i915_vma_move_to_active(struct i915_vma *vma,
|
||||
struct i915_request *rq,
|
||||
struct dma_fence *fence,
|
||||
unsigned int flags);
|
||||
static inline int __must_check
|
||||
i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq,
|
||||
unsigned int flags)
|
||||
{
|
||||
return _i915_vma_move_to_active(vma, rq, &rq->fence, flags);
|
||||
}
|
||||
|
||||
#define __i915_vma_flags(v) ((unsigned long *)&(v)->flags.counter)
|
||||
|
||||
|
@ -123,6 +123,12 @@ enum {
|
||||
__INTEL_WAKEREF_PUT_LAST_BIT__
|
||||
};
|
||||
|
||||
static inline void
|
||||
intel_wakeref_might_get(struct intel_wakeref *wf)
|
||||
{
|
||||
might_lock(&wf->mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
* intel_wakeref_put_flags: Release the wakeref
|
||||
* @wf: the wakeref
|
||||
@ -170,6 +176,12 @@ intel_wakeref_put_delay(struct intel_wakeref *wf, unsigned long delay)
|
||||
FIELD_PREP(INTEL_WAKEREF_PUT_DELAY, delay));
|
||||
}
|
||||
|
||||
static inline void
|
||||
intel_wakeref_might_put(struct intel_wakeref *wf)
|
||||
{
|
||||
might_lock(&wf->mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
* intel_wakeref_lock: Lock the wakeref (mutex)
|
||||
* @wf: the wakeref
|
||||
|
@ -48,5 +48,6 @@ selftest(ring_submission, intel_ring_submission_live_selftests)
|
||||
selftest(perf, i915_perf_live_selftests)
|
||||
selftest(slpc, intel_slpc_live_selftests)
|
||||
selftest(guc, intel_guc_live_selftests)
|
||||
selftest(guc_multi_lrc, intel_guc_multi_lrc_live_selftests)
|
||||
/* Here be dragons: keep last to run last! */
|
||||
selftest(late_gt_pm, intel_gt_pm_late_selftests)
|
||||
|
@ -6,8 +6,6 @@
|
||||
#include <drm/ttm/ttm_placement.h>
|
||||
#include <linux/scatterlist.h>
|
||||
|
||||
#include <drm/ttm/ttm_placement.h>
|
||||
|
||||
#include "gem/i915_gem_region.h"
|
||||
#include "intel_memory_region.h"
|
||||
#include "intel_region_ttm.h"
|
||||
|
@ -1830,6 +1830,7 @@ struct drm_i915_gem_context_param {
|
||||
* Extensions:
|
||||
* i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
|
||||
* i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
|
||||
* i915_context_engines_parallel_submit (I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT)
|
||||
*/
|
||||
#define I915_CONTEXT_PARAM_ENGINES 0xa
|
||||
|
||||
@ -2104,6 +2105,135 @@ struct i915_context_engines_bond {
|
||||
struct i915_engine_class_instance engines[N__]; \
|
||||
} __attribute__((packed)) name__
|
||||
|
||||
/**
|
||||
* struct i915_context_engines_parallel_submit - Configure engine for
|
||||
* parallel submission.
|
||||
*
|
||||
* Setup a slot in the context engine map to allow multiple BBs to be submitted
|
||||
* in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU
|
||||
* in parallel. Multiple hardware contexts are created internally in the i915 to
|
||||
* run these BBs. Once a slot is configured for N BBs only N BBs can be
|
||||
* submitted in each execbuf IOCTL and this is implicit behavior e.g. The user
|
||||
* doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how
|
||||
* many BBs there are based on the slot's configuration. The N BBs are the last
|
||||
* N buffer objects or first N if I915_EXEC_BATCH_FIRST is set.
|
||||
*
|
||||
* The default placement behavior is to create implicit bonds between each
|
||||
* context if each context maps to more than 1 physical engine (e.g. context is
|
||||
* a virtual engine). Also we only allow contexts of same engine class and these
|
||||
* contexts must be in logically contiguous order. Examples of the placement
|
||||
* behavior are described below. Lastly, the default is to not allow BBs to be
|
||||
* preempted mid-batch. Rather insert coordinated preemption points on all
|
||||
* hardware contexts between each set of BBs. Flags could be added in the future
|
||||
* to change both of these default behaviors.
|
||||
*
|
||||
* Returns -EINVAL if hardware context placement configuration is invalid or if
|
||||
* the placement configuration isn't supported on the platform / submission
|
||||
* interface.
|
||||
* Returns -ENODEV if extension isn't supported on the platform / submission
|
||||
* interface.
|
||||
*
|
||||
* .. code-block:: none
|
||||
*
|
||||
* Examples syntax:
|
||||
* CS[X] = generic engine of same class, logical instance X
|
||||
* INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
|
||||
*
|
||||
* Example 1 pseudo code:
|
||||
* set_engines(INVALID)
|
||||
* set_parallel(engine_index=0, width=2, num_siblings=1,
|
||||
* engines=CS[0],CS[1])
|
||||
*
|
||||
* Results in the following valid placement:
|
||||
* CS[0], CS[1]
|
||||
*
|
||||
* Example 2 pseudo code:
|
||||
* set_engines(INVALID)
|
||||
* set_parallel(engine_index=0, width=2, num_siblings=2,
|
||||
* engines=CS[0],CS[2],CS[1],CS[3])
|
||||
*
|
||||
* Results in the following valid placements:
|
||||
* CS[0], CS[1]
|
||||
* CS[2], CS[3]
|
||||
*
|
||||
* This can be thought of as two virtual engines, each containing two
|
||||
* engines thereby making a 2D array. However, there are bonds tying the
|
||||
* entries together and placing restrictions on how they can be scheduled.
|
||||
* Specifically, the scheduler can choose only vertical columns from the 2D
|
||||
* array. That is, CS[0] is bonded to CS[1] and CS[2] to CS[3]. So if the
|
||||
* scheduler wants to submit to CS[0], it must also choose CS[1] and vice
|
||||
* versa. Same for CS[2] requires also using CS[3].
|
||||
* VE[0] = CS[0], CS[2]
|
||||
* VE[1] = CS[1], CS[3]
|
||||
*
|
||||
* Example 3 pseudo code:
|
||||
* set_engines(INVALID)
|
||||
* set_parallel(engine_index=0, width=2, num_siblings=2,
|
||||
* engines=CS[0],CS[1],CS[1],CS[3])
|
||||
*
|
||||
* Results in the following valid and invalid placements:
|
||||
* CS[0], CS[1]
|
||||
* CS[1], CS[3] - Not logically contiguous, return -EINVAL
|
||||
*/
|
||||
struct i915_context_engines_parallel_submit {
|
||||
/**
|
||||
* @base: base user extension.
|
||||
*/
|
||||
struct i915_user_extension base;
|
||||
|
||||
/**
|
||||
* @engine_index: slot for parallel engine
|
||||
*/
|
||||
__u16 engine_index;
|
||||
|
||||
/**
|
||||
* @width: number of contexts per parallel engine or in other words the
|
||||
* number of batches in each submission
|
||||
*/
|
||||
__u16 width;
|
||||
|
||||
/**
|
||||
* @num_siblings: number of siblings per context or in other words the
|
||||
* number of possible placements for each submission
|
||||
*/
|
||||
__u16 num_siblings;
|
||||
|
||||
/**
|
||||
* @mbz16: reserved for future use; must be zero
|
||||
*/
|
||||
__u16 mbz16;
|
||||
|
||||
/**
|
||||
* @flags: all undefined flags must be zero, currently not defined flags
|
||||
*/
|
||||
__u64 flags;
|
||||
|
||||
/**
|
||||
* @mbz64: reserved for future use; must be zero
|
||||
*/
|
||||
__u64 mbz64[3];
|
||||
|
||||
/**
|
||||
* @engines: 2-d array of engine instances to configure parallel engine
|
||||
*
|
||||
* length = width (i) * num_siblings (j)
|
||||
* index = j + i * num_siblings
|
||||
*/
|
||||
struct i915_engine_class_instance engines[0];
|
||||
|
||||
} __packed;
|
||||
|
||||
#define I915_DEFINE_CONTEXT_ENGINES_PARALLEL_SUBMIT(name__, N__) struct { \
|
||||
struct i915_user_extension base; \
|
||||
__u16 engine_index; \
|
||||
__u16 width; \
|
||||
__u16 num_siblings; \
|
||||
__u16 mbz16; \
|
||||
__u64 flags; \
|
||||
__u64 mbz64[3]; \
|
||||
struct i915_engine_class_instance engines[N__]; \
|
||||
} __attribute__((packed)) name__
|
||||
|
||||
/**
|
||||
* DOC: Context Engine Map uAPI
|
||||
*
|
||||
@ -2163,6 +2293,7 @@ struct i915_context_param_engines {
|
||||
__u64 extensions; /* linked chain of extension blocks, 0 terminates */
|
||||
#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
|
||||
#define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */
|
||||
#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
|
||||
struct i915_engine_class_instance engines[0];
|
||||
} __attribute__((packed));
|
||||
|
||||
@ -2781,14 +2912,20 @@ struct drm_i915_engine_info {
|
||||
|
||||
/** @flags: Engine flags. */
|
||||
__u64 flags;
|
||||
#define I915_ENGINE_INFO_HAS_LOGICAL_INSTANCE (1 << 0)
|
||||
|
||||
/** @capabilities: Capabilities of this engine. */
|
||||
__u64 capabilities;
|
||||
#define I915_VIDEO_CLASS_CAPABILITY_HEVC (1 << 0)
|
||||
#define I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC (1 << 1)
|
||||
|
||||
/** @logical_instance: Logical instance of engine */
|
||||
__u16 logical_instance;
|
||||
|
||||
/** @rsvd1: Reserved fields. */
|
||||
__u64 rsvd1[4];
|
||||
__u16 rsvd1[3];
|
||||
/** @rsvd2: Reserved fields. */
|
||||
__u64 rsvd2[3];
|
||||
};
|
||||
|
||||
/**
|
||||
|
Loading…
Reference in New Issue
Block a user