diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index ee241b7eaa3b..f0ce70861e93 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -1356,7 +1356,9 @@ static int load_pd_dir(struct i915_request *rq, return rq->engine->emit_flush(rq, EMIT_FLUSH); } -static inline int mi_set_context(struct i915_request *rq, u32 flags) +static inline int mi_set_context(struct i915_request *rq, + struct intel_context *ce, + u32 flags) { struct drm_i915_private *i915 = rq->i915; struct intel_engine_cs *engine = rq->engine; @@ -1431,7 +1433,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) *cs++ = MI_NOOP; *cs++ = MI_SET_CONTEXT; - *cs++ = i915_ggtt_offset(rq->context->state) | flags; + *cs++ = i915_ggtt_offset(ce->state) | flags; /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv @@ -1546,13 +1548,56 @@ static int switch_mm(struct i915_request *rq, struct i915_address_space *vm) return rq->engine->emit_flush(rq, EMIT_INVALIDATE); } +static int clear_residuals(struct i915_request *rq) +{ + struct intel_engine_cs *engine = rq->engine; + int ret; + + ret = switch_mm(rq, vm_alias(engine->kernel_context->vm)); + if (ret) + return ret; + + if (engine->kernel_context->state) { + ret = mi_set_context(rq, + engine->kernel_context, + MI_MM_SPACE_GTT | MI_RESTORE_INHIBIT); + if (ret) + return ret; + } + + ret = engine->emit_bb_start(rq, + engine->wa_ctx.vma->node.start, 0, + 0); + if (ret) + return ret; + + ret = engine->emit_flush(rq, EMIT_FLUSH); + if (ret) + return ret; + + /* Always invalidate before the next switch_mm() */ + return engine->emit_flush(rq, EMIT_INVALIDATE); +} + static int switch_context(struct i915_request *rq) { + struct intel_engine_cs *engine = rq->engine; struct intel_context *ce = rq->context; + void **residuals = NULL; int ret; GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); + if (engine->wa_ctx.vma && ce != engine->kernel_context) { + if (engine->wa_ctx.vma->private != ce) { + ret = clear_residuals(rq); + if (ret) + return ret; + + residuals = &engine->wa_ctx.vma->private; + } + } + ret = switch_mm(rq, vm_alias(ce->vm)); if (ret) return ret; @@ -1560,7 +1605,7 @@ static int switch_context(struct i915_request *rq) if (ce->state) { u32 flags; - GEM_BUG_ON(rq->engine->id != RCS0); + GEM_BUG_ON(engine->id != RCS0); /* For resource streamer on HSW+ and power context elsewhere */ BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN); @@ -1572,7 +1617,7 @@ static int switch_context(struct i915_request *rq) else flags |= MI_RESTORE_INHIBIT; - ret = mi_set_context(rq, flags); + ret = mi_set_context(rq, ce, flags); if (ret) return ret; } @@ -1581,6 +1626,20 @@ static int switch_context(struct i915_request *rq) if (ret) return ret; + /* + * Now past the point of no return, this request _will_ be emitted. + * + * Or at least this preamble will be emitted, the request may be + * interrupted prior to submitting the user payload. If so, we + * still submit the "empty" request in order to preserve global + * state tracking such as this, our tracking of the current + * dirty context. + */ + if (residuals) { + intel_context_put(*residuals); + *residuals = intel_context_get(ce); + } + return 0; } @@ -1765,6 +1824,11 @@ static void ring_release(struct intel_engine_cs *engine) intel_engine_cleanup_common(engine); + if (engine->wa_ctx.vma) { + intel_context_put(engine->wa_ctx.vma->private); + i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0); + } + intel_ring_unpin(engine->legacy.ring); intel_ring_put(engine->legacy.ring); @@ -1912,6 +1976,60 @@ static void setup_vecs(struct intel_engine_cs *engine) engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb; } +static int gen7_ctx_switch_bb_setup(struct intel_engine_cs * const engine, + struct i915_vma * const vma) +{ + return 0; +} + +static int gen7_ctx_switch_bb_init(struct intel_engine_cs *engine) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int size; + int err; + + size = gen7_ctx_switch_bb_setup(engine, NULL /* probe size */); + if (size <= 0) + return size; + + size = ALIGN(size, PAGE_SIZE); + obj = i915_gem_object_create_internal(engine->i915, size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, engine->gt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + vma->private = intel_context_create(engine); /* dummy residuals */ + if (IS_ERR(vma->private)) { + err = PTR_ERR(vma->private); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH); + if (err) + goto err_private; + + err = gen7_ctx_switch_bb_setup(engine, vma); + if (err) + goto err_unpin; + + engine->wa_ctx.vma = vma; + return 0; + +err_unpin: + i915_vma_unpin(vma); +err_private: + intel_context_put(vma->private); +err_obj: + i915_gem_object_put(obj); + return err; +} + int intel_ring_submission_setup(struct intel_engine_cs *engine) { struct intel_timeline *timeline; @@ -1965,11 +2083,19 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine) GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma); + if (IS_GEN(engine->i915, 7) && engine->class == RENDER_CLASS) { + err = gen7_ctx_switch_bb_init(engine); + if (err) + goto err_ring_unpin; + } + /* Finally, take ownership and responsibility for cleanup! */ engine->release = ring_release; return 0; +err_ring_unpin: + intel_ring_unpin(ring); err_ring: intel_ring_put(ring); err_timeline_unpin: @@ -1980,3 +2106,7 @@ err: intel_engine_cleanup_common(engine); return err; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_ring_submission.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c new file mode 100644 index 000000000000..fcfddcd2a63d --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c @@ -0,0 +1,290 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include "intel_engine_pm.h" +#include "selftests/igt_flush_test.h" + +static struct i915_vma *create_wally(struct intel_engine_cs *engine) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *cs; + int err; + + obj = i915_gem_object_create_internal(engine->i915, 4096); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, engine->gt->vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return vma; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH); + if (err) { + i915_gem_object_put(obj); + return ERR_PTR(err); + } + + cs = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(cs)) { + i915_gem_object_put(obj); + return ERR_CAST(cs); + } + + if (INTEL_GEN(engine->i915) >= 6) { + *cs++ = MI_STORE_DWORD_IMM_GEN4; + *cs++ = 0; + } else if (INTEL_GEN(engine->i915) >= 4) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = 0; + } else { + *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + } + *cs++ = vma->node.start + 4000; + *cs++ = STACK_MAGIC; + + *cs++ = MI_BATCH_BUFFER_END; + i915_gem_object_unpin_map(obj); + + vma->private = intel_context_create(engine); /* dummy residuals */ + if (IS_ERR(vma->private)) { + vma = ERR_CAST(vma->private); + i915_gem_object_put(obj); + } + + return vma; +} + +static int context_sync(struct intel_context *ce) +{ + struct i915_request *rq; + int err = 0; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_get(rq); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) + err = -ETIME; + i915_request_put(rq); + + return err; +} + +static int new_context_sync(struct intel_engine_cs *engine) +{ + struct intel_context *ce; + int err; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + err = context_sync(ce); + intel_context_put(ce); + + return err; +} + +static int mixed_contexts_sync(struct intel_engine_cs *engine, u32 *result) +{ + int pass; + int err; + + for (pass = 0; pass < 2; pass++) { + WRITE_ONCE(*result, 0); + err = context_sync(engine->kernel_context); + if (err || READ_ONCE(*result)) { + if (!err) { + pr_err("pass[%d] wa_bb emitted for the kernel context\n", + pass); + err = -EINVAL; + } + return err; + } + + WRITE_ONCE(*result, 0); + err = new_context_sync(engine); + if (READ_ONCE(*result) != STACK_MAGIC) { + if (!err) { + pr_err("pass[%d] wa_bb *NOT* emitted after the kernel context\n", + pass); + err = -EINVAL; + } + return err; + } + + WRITE_ONCE(*result, 0); + err = new_context_sync(engine); + if (READ_ONCE(*result) != STACK_MAGIC) { + if (!err) { + pr_err("pass[%d] wa_bb *NOT* emitted for the user context switch\n", + pass); + err = -EINVAL; + } + return err; + } + } + + return 0; +} + +static int double_context_sync_00(struct intel_engine_cs *engine, u32 *result) +{ + struct intel_context *ce; + int err, i; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + for (i = 0; i < 2; i++) { + WRITE_ONCE(*result, 0); + err = context_sync(ce); + if (err) + break; + } + intel_context_put(ce); + if (err) + return err; + + if (READ_ONCE(*result)) { + pr_err("wa_bb emitted between the same user context\n"); + return -EINVAL; + } + + return 0; +} + +static int kernel_context_sync_00(struct intel_engine_cs *engine, u32 *result) +{ + struct intel_context *ce; + int err, i; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + for (i = 0; i < 2; i++) { + WRITE_ONCE(*result, 0); + err = context_sync(ce); + if (err) + break; + + err = context_sync(engine->kernel_context); + if (err) + break; + } + intel_context_put(ce); + if (err) + return err; + + if (READ_ONCE(*result)) { + pr_err("wa_bb emitted between the same user context [with intervening kernel]\n"); + return -EINVAL; + } + + return 0; +} + +static int __live_ctx_switch_wa(struct intel_engine_cs *engine) +{ + struct i915_vma *bb; + u32 *result; + int err; + + bb = create_wally(engine); + if (IS_ERR(bb)) + return PTR_ERR(bb); + + result = i915_gem_object_pin_map(bb->obj, I915_MAP_WC); + if (IS_ERR(result)) { + intel_context_put(bb->private); + i915_vma_unpin_and_release(&bb, 0); + return PTR_ERR(result); + } + result += 1000; + + engine->wa_ctx.vma = bb; + + err = mixed_contexts_sync(engine, result); + if (err) + goto out; + + err = double_context_sync_00(engine, result); + if (err) + goto out; + + err = kernel_context_sync_00(engine, result); + if (err) + goto out; + +out: + intel_context_put(engine->wa_ctx.vma->private); + i915_vma_unpin_and_release(&engine->wa_ctx.vma, I915_VMA_RELEASE_MAP); + return err; +} + +static int live_ctx_switch_wa(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* + * Exercise the inter-context wa batch. + * + * Between each user context we run a wa batch, and since it may + * have implications for user visible state, we have to check that + * we do actually execute it. + * + * The trick we use is to replace the normal wa batch with a custom + * one that writes to a marker within it, and we can then look for + * that marker to confirm if the batch was run when we expect it, + * and equally important it was wasn't run when we don't! + */ + + for_each_engine(engine, gt, id) { + struct i915_vma *saved_wa; + int err; + + if (!intel_engine_can_store_dword(engine)) + continue; + + if (IS_GEN_RANGE(gt->i915, 4, 5)) + continue; /* MI_STORE_DWORD is privileged! */ + + saved_wa = fetch_and_zero(&engine->wa_ctx.vma); + + intel_engine_pm_get(engine); + err = __live_ctx_switch_wa(engine); + intel_engine_pm_put(engine); + if (igt_flush_test(gt->i915)) + err = -EIO; + + engine->wa_ctx.vma = saved_wa; + if (err) + return err; + } + + return 0; +} + +int intel_ring_submission_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_ctx_switch_wa), + }; + + if (HAS_EXECLISTS(i915)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 34138c7bdd15..0a953bfc0585 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -43,6 +43,7 @@ selftest(reset, intel_reset_live_selftests) selftest(memory_region, intel_memory_region_live_selftests) selftest(hangcheck, intel_hangcheck_live_selftests) selftest(execlists, intel_execlists_live_selftests) +selftest(ring_submission, intel_ring_submission_live_selftests) selftest(perf, i915_perf_live_selftests) /* Here be dragons: keep last to run last! */ selftest(late_gt_pm, intel_gt_pm_late_selftests)