drm/i915: Multi-BB execbuf

Allow multiple batch buffers to be submitted in a single execbuf IOCTL
after a context has been configured with the 'set_parallel' extension.
The number batches is implicit based on the contexts configuration.

This is implemented with a series of loops. First a loop is used to find
all the batches, a loop to pin all the HW contexts, a loop to create all
the requests, a loop to submit (emit BB start, etc...) all the requests,
a loop to tie the requests to the VMAs they touch, and finally a loop to
commit the requests to the backend.

A composite fence is also created for the generated requests to return
to the user and to stick in dma resv slots.

No behavior from the existing IOCTL should be changed aside from when
throttling because the ring for a context is full. In this situation,
i915 will now wait while holding the object locks. This change was done
because the code is much simpler to wait while holding the locks and we
believe there isn't a huge benefit of dropping these locks. If this
proves false we can restructure the code to drop the locks during the
wait.

IGT: https://patchwork.freedesktop.org/patch/447008/?series=93071&rev=1
media UMD: https://github.com/intel/media-driver/pull/1252

v2:
 (Matthew Brost)
  - Return proper error value if i915_request_create fails
v3:
 (John Harrison)
  - Add comment explaining create / add order loops + locking
  - Update commit message explaining different in IOCTL behavior
  - Line wrap some comments
  - eb_add_request returns void
  - Return -EINVAL rather triggering BUG_ON if cmd parser used
 (Checkpatch)
  - Check eb->batch_len[*current_batch]
v4:
 (CI)
  - Set batch len if passed if via execbuf args
  - Call __i915_request_skip after __i915_request_commit
 (Kernel test robot)
  - Initialize rq to NULL in eb_pin_timeline
v5:
 (John Harrison)
  - Fix typo in comments near bb order loops

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211014172005.27155-21-matthew.brost@intel.com
This commit is contained in:
Matthew Brost 2021-10-14 10:20:00 -07:00 committed by John Harrison
parent 5851387a42
commit 544460c338
7 changed files with 595 additions and 251 deletions

File diff suppressed because it is too large Load Diff

View File

@ -241,6 +241,12 @@ intel_context_timeline_lock(struct intel_context *ce)
struct intel_timeline *tl = ce->timeline; struct intel_timeline *tl = ce->timeline;
int err; int err;
if (intel_context_is_parent(ce))
err = mutex_lock_interruptible_nested(&tl->mutex, 0);
else if (intel_context_is_child(ce))
err = mutex_lock_interruptible_nested(&tl->mutex,
ce->parallel.child_index + 1);
else
err = mutex_lock_interruptible(&tl->mutex); err = mutex_lock_interruptible(&tl->mutex);
if (err) if (err)
return ERR_PTR(err); return ERR_PTR(err);

View File

@ -248,6 +248,16 @@ struct intel_context {
* context * context
*/ */
struct i915_request *last_rq; struct i915_request *last_rq;
/**
* @fence_context: fence context composite fence when doing
* parallel submission
*/
u64 fence_context;
/**
* @seqno: seqno for composite fence when doing parallel
* submission
*/
u32 seqno;
/** @number_children: number of children if parent */ /** @number_children: number of children if parent */
u8 number_children; u8 number_children;
/** @child_index: index into child_list if child */ /** @child_index: index into child_list if child */

View File

@ -3093,6 +3093,8 @@ guc_create_parallel(struct intel_engine_cs **engines,
} }
} }
parent->parallel.fence_context = dma_fence_context_alloc(1);
parent->engine->emit_bb_start = parent->engine->emit_bb_start =
emit_bb_start_parent_no_preempt_mid_batch; emit_bb_start_parent_no_preempt_mid_batch;
parent->engine->emit_fini_breadcrumb = parent->engine->emit_fini_breadcrumb =

View File

@ -147,6 +147,15 @@ enum {
* tail. * tail.
*/ */
I915_FENCE_FLAG_SUBMIT_PARALLEL, I915_FENCE_FLAG_SUBMIT_PARALLEL,
/*
* I915_FENCE_FLAG_SKIP_PARALLEL - request with a context in a
* parent-child relationship (parallel submission, multi-lrc) that
* hit an error while generating requests in the execbuf IOCTL.
* Indicates this request should be skipped as another request in
* submission / relationship encoutered an error.
*/
I915_FENCE_FLAG_SKIP_PARALLEL,
}; };
/** /**

View File

@ -1234,8 +1234,9 @@ int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
return i915_active_add_request(&vma->active, rq); return i915_active_add_request(&vma->active, rq);
} }
int i915_vma_move_to_active(struct i915_vma *vma, int _i915_vma_move_to_active(struct i915_vma *vma,
struct i915_request *rq, struct i915_request *rq,
struct dma_fence *fence,
unsigned int flags) unsigned int flags)
{ {
struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_gem_object *obj = vma->obj;
@ -1257,9 +1258,11 @@ int i915_vma_move_to_active(struct i915_vma *vma,
intel_frontbuffer_put(front); intel_frontbuffer_put(front);
} }
dma_resv_add_excl_fence(vma->resv, &rq->fence); if (fence) {
dma_resv_add_excl_fence(vma->resv, fence);
obj->write_domain = I915_GEM_DOMAIN_RENDER; obj->write_domain = I915_GEM_DOMAIN_RENDER;
obj->read_domains = 0; obj->read_domains = 0;
}
} else { } else {
if (!(flags & __EXEC_OBJECT_NO_RESERVE)) { if (!(flags & __EXEC_OBJECT_NO_RESERVE)) {
err = dma_resv_reserve_shared(vma->resv, 1); err = dma_resv_reserve_shared(vma->resv, 1);
@ -1267,9 +1270,11 @@ int i915_vma_move_to_active(struct i915_vma *vma,
return err; return err;
} }
dma_resv_add_shared_fence(vma->resv, &rq->fence); if (fence) {
dma_resv_add_shared_fence(vma->resv, fence);
obj->write_domain = 0; obj->write_domain = 0;
} }
}
if (flags & EXEC_OBJECT_NEEDS_FENCE && vma->fence) if (flags & EXEC_OBJECT_NEEDS_FENCE && vma->fence)
i915_active_add_request(&vma->fence->active, rq); i915_active_add_request(&vma->fence->active, rq);

View File

@ -57,9 +57,16 @@ static inline bool i915_vma_is_active(const struct i915_vma *vma)
int __must_check __i915_vma_move_to_active(struct i915_vma *vma, int __must_check __i915_vma_move_to_active(struct i915_vma *vma,
struct i915_request *rq); struct i915_request *rq);
int __must_check i915_vma_move_to_active(struct i915_vma *vma, int __must_check _i915_vma_move_to_active(struct i915_vma *vma,
struct i915_request *rq, struct i915_request *rq,
struct dma_fence *fence,
unsigned int flags); unsigned int flags);
static inline int __must_check
i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq,
unsigned int flags)
{
return _i915_vma_move_to_active(vma, rq, &rq->fence, flags);
}
#define __i915_vma_flags(v) ((unsigned long *)&(v)->flags.counter) #define __i915_vma_flags(v) ((unsigned long *)&(v)->flags.counter)