drm/i915/gt: Introduce barrier pulses along engines
To flush idle barriers, and even inflight requests, we want to send a preemptive 'pulse' along an engine. We use a no-op request along the pinned kernel_context at high priority so that it should run or else kick off the stuck requests. We can use this to ensure idle barriers are immediately flushed, as part of a context cancellation mechanism, or as part of a heartbeat mechanism to detect and reset a stuck GPU. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20191021174339.5389-1-chris@chris-wilson.co.uk
This commit is contained in:
parent
928da10c0c
commit
b5e8e954eb
@ -78,8 +78,9 @@ gt-y += \
|
||||
gt/intel_breadcrumbs.o \
|
||||
gt/intel_context.o \
|
||||
gt/intel_engine_cs.o \
|
||||
gt/intel_engine_pool.o \
|
||||
gt/intel_engine_heartbeat.o \
|
||||
gt/intel_engine_pm.o \
|
||||
gt/intel_engine_pool.o \
|
||||
gt/intel_engine_user.o \
|
||||
gt/intel_gt.o \
|
||||
gt/intel_gt_irq.o \
|
||||
|
77
drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
Normal file
77
drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
Normal file
@ -0,0 +1,77 @@
|
||||
/*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Copyright © 2019 Intel Corporation
|
||||
*/
|
||||
|
||||
#include "i915_request.h"
|
||||
|
||||
#include "intel_context.h"
|
||||
#include "intel_engine_heartbeat.h"
|
||||
#include "intel_engine_pm.h"
|
||||
#include "intel_engine.h"
|
||||
#include "intel_gt.h"
|
||||
|
||||
static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
|
||||
{
|
||||
engine->wakeref_serial = READ_ONCE(engine->serial) + 1;
|
||||
i915_request_add_active_barriers(rq);
|
||||
}
|
||||
|
||||
int intel_engine_pulse(struct intel_engine_cs *engine)
|
||||
{
|
||||
struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
|
||||
struct intel_context *ce = engine->kernel_context;
|
||||
struct i915_request *rq;
|
||||
int err = 0;
|
||||
|
||||
if (!intel_engine_has_preemption(engine))
|
||||
return -ENODEV;
|
||||
|
||||
if (!intel_engine_pm_get_if_awake(engine))
|
||||
return 0;
|
||||
|
||||
if (mutex_lock_interruptible(&ce->timeline->mutex))
|
||||
goto out_rpm;
|
||||
|
||||
intel_context_enter(ce);
|
||||
rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
|
||||
intel_context_exit(ce);
|
||||
if (IS_ERR(rq)) {
|
||||
err = PTR_ERR(rq);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
rq->flags |= I915_REQUEST_SENTINEL;
|
||||
idle_pulse(engine, rq);
|
||||
|
||||
__i915_request_commit(rq);
|
||||
__i915_request_queue(rq, &attr);
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&ce->timeline->mutex);
|
||||
out_rpm:
|
||||
intel_engine_pm_put(engine);
|
||||
return err;
|
||||
}
|
||||
|
||||
int intel_engine_flush_barriers(struct intel_engine_cs *engine)
|
||||
{
|
||||
struct i915_request *rq;
|
||||
|
||||
if (llist_empty(&engine->barrier_tasks))
|
||||
return 0;
|
||||
|
||||
rq = i915_request_create(engine->kernel_context);
|
||||
if (IS_ERR(rq))
|
||||
return PTR_ERR(rq);
|
||||
|
||||
idle_pulse(engine, rq);
|
||||
i915_request_add(rq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
|
||||
#include "selftest_engine_heartbeat.c"
|
||||
#endif
|
15
drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
Normal file
15
drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
Normal file
@ -0,0 +1,15 @@
|
||||
/*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Copyright © 2019 Intel Corporation
|
||||
*/
|
||||
|
||||
#ifndef INTEL_ENGINE_HEARTBEAT_H
|
||||
#define INTEL_ENGINE_HEARTBEAT_H
|
||||
|
||||
struct intel_engine_cs;
|
||||
|
||||
int intel_engine_pulse(struct intel_engine_cs *engine);
|
||||
int intel_engine_flush_barriers(struct intel_engine_cs *engine);
|
||||
|
||||
#endif /* INTEL_ENGINE_HEARTBEAT_H */
|
@ -111,7 +111,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
|
||||
i915_request_add_active_barriers(rq);
|
||||
|
||||
/* Install ourselves as a preemption barrier */
|
||||
rq->sched.attr.priority = I915_PRIORITY_UNPREEMPTABLE;
|
||||
rq->sched.attr.priority = I915_PRIORITY_BARRIER;
|
||||
__i915_request_commit(rq);
|
||||
|
||||
/* Release our exclusive hold on the engine */
|
||||
|
159
drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
Normal file
159
drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
Normal file
@ -0,0 +1,159 @@
|
||||
/*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Copyright © 2018 Intel Corporation
|
||||
*/
|
||||
|
||||
#include "i915_drv.h"
|
||||
|
||||
#include "intel_gt_requests.h"
|
||||
#include "i915_selftest.h"
|
||||
|
||||
struct pulse {
|
||||
struct i915_active active;
|
||||
struct kref kref;
|
||||
};
|
||||
|
||||
static int pulse_active(struct i915_active *active)
|
||||
{
|
||||
kref_get(&container_of(active, struct pulse, active)->kref);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pulse_free(struct kref *kref)
|
||||
{
|
||||
kfree(container_of(kref, struct pulse, kref));
|
||||
}
|
||||
|
||||
static void pulse_put(struct pulse *p)
|
||||
{
|
||||
kref_put(&p->kref, pulse_free);
|
||||
}
|
||||
|
||||
static void pulse_retire(struct i915_active *active)
|
||||
{
|
||||
pulse_put(container_of(active, struct pulse, active));
|
||||
}
|
||||
|
||||
static struct pulse *pulse_create(void)
|
||||
{
|
||||
struct pulse *p;
|
||||
|
||||
p = kmalloc(sizeof(*p), GFP_KERNEL);
|
||||
if (!p)
|
||||
return p;
|
||||
|
||||
kref_init(&p->kref);
|
||||
i915_active_init(&p->active, pulse_active, pulse_retire);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static int __live_idle_pulse(struct intel_engine_cs *engine,
|
||||
int (*fn)(struct intel_engine_cs *cs))
|
||||
{
|
||||
struct pulse *p;
|
||||
int err;
|
||||
|
||||
p = pulse_create();
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
|
||||
err = i915_active_acquire(&p->active);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = i915_active_acquire_preallocate_barrier(&p->active, engine);
|
||||
if (err) {
|
||||
i915_active_release(&p->active);
|
||||
goto out;
|
||||
}
|
||||
|
||||
i915_active_acquire_barrier(&p->active);
|
||||
i915_active_release(&p->active);
|
||||
|
||||
GEM_BUG_ON(i915_active_is_idle(&p->active));
|
||||
|
||||
err = fn(engine);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (intel_gt_retire_requests_timeout(engine->gt, HZ / 5)) {
|
||||
err = -ETIME;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!i915_active_is_idle(&p->active)) {
|
||||
pr_err("%s: heartbeat pulse did not flush idle tasks\n",
|
||||
engine->name);
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
pulse_put(p);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int live_idle_flush(void *arg)
|
||||
{
|
||||
struct intel_gt *gt = arg;
|
||||
struct intel_engine_cs *engine;
|
||||
enum intel_engine_id id;
|
||||
int err = 0;
|
||||
|
||||
/* Check that we can flush the idle barriers */
|
||||
|
||||
for_each_engine(engine, gt, id) {
|
||||
intel_engine_pm_get(engine);
|
||||
err = __live_idle_pulse(engine, intel_engine_flush_barriers);
|
||||
intel_engine_pm_put(engine);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int live_idle_pulse(void *arg)
|
||||
{
|
||||
struct intel_gt *gt = arg;
|
||||
struct intel_engine_cs *engine;
|
||||
enum intel_engine_id id;
|
||||
int err = 0;
|
||||
|
||||
/* Check that heartbeat pulses flush the idle barriers */
|
||||
|
||||
for_each_engine(engine, gt, id) {
|
||||
intel_engine_pm_get(engine);
|
||||
err = __live_idle_pulse(engine, intel_engine_pulse);
|
||||
intel_engine_pm_put(engine);
|
||||
if (err && err != -ENODEV)
|
||||
break;
|
||||
|
||||
err = 0;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int intel_heartbeat_live_selftests(struct drm_i915_private *i915)
|
||||
{
|
||||
static const struct i915_subtest tests[] = {
|
||||
SUBTEST(live_idle_flush),
|
||||
SUBTEST(live_idle_pulse),
|
||||
};
|
||||
int saved_hangcheck;
|
||||
int err;
|
||||
|
||||
if (intel_gt_is_wedged(&i915->gt))
|
||||
return 0;
|
||||
|
||||
saved_hangcheck = i915_modparams.enable_hangcheck;
|
||||
i915_modparams.enable_hangcheck = INT_MAX;
|
||||
|
||||
err = intel_gt_live_subtests(tests, &i915->gt);
|
||||
|
||||
i915_modparams.enable_hangcheck = saved_hangcheck;
|
||||
return err;
|
||||
}
|
@ -595,6 +595,7 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
|
||||
struct llist_node *pos, *next;
|
||||
int err;
|
||||
|
||||
GEM_BUG_ON(i915_active_is_idle(ref));
|
||||
GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers));
|
||||
|
||||
/*
|
||||
|
@ -39,6 +39,7 @@ enum {
|
||||
* active request.
|
||||
*/
|
||||
#define I915_PRIORITY_UNPREEMPTABLE INT_MAX
|
||||
#define I915_PRIORITY_BARRIER INT_MAX
|
||||
|
||||
#define __NO_PREEMPTION (I915_PRIORITY_WAIT)
|
||||
|
||||
|
@ -17,6 +17,7 @@ selftest(gt_timelines, intel_timeline_live_selftests)
|
||||
selftest(gt_contexts, intel_context_live_selftests)
|
||||
selftest(gt_lrc, intel_lrc_live_selftests)
|
||||
selftest(gt_pm, intel_gt_pm_live_selftests)
|
||||
selftest(gt_heartbeat, intel_heartbeat_live_selftests)
|
||||
selftest(requests, i915_request_live_selftests)
|
||||
selftest(active, i915_active_live_selftests)
|
||||
selftest(objects, i915_gem_object_live_selftests)
|
||||
|
Loading…
Reference in New Issue
Block a user