drm/i915: Serialise concurrent calls to i915_gem_set_wedged()
Make i915_gem_set_wedged() and i915_gem_unset_wedged() behaviour more consistent if called concurrently, and only do the wedging and reporting once, curtailing any possible race where we start unwedging in the middle of a wedge. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190114210408.4561-2-chris@chris-wilson.co.uk
This commit is contained in:
parent
204474a6b8
commit
18bb2bccb5
@ -3187,10 +3187,15 @@ static void nop_submit_request(struct i915_request *request)
|
||||
|
||||
void i915_gem_set_wedged(struct drm_i915_private *i915)
|
||||
{
|
||||
struct i915_gpu_error *error = &i915->gpu_error;
|
||||
struct intel_engine_cs *engine;
|
||||
enum intel_engine_id id;
|
||||
|
||||
GEM_TRACE("start\n");
|
||||
mutex_lock(&error->wedge_mutex);
|
||||
if (test_bit(I915_WEDGED, &error->flags)) {
|
||||
mutex_unlock(&error->wedge_mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(i915)) {
|
||||
struct drm_printer p = drm_debug_printer(__func__);
|
||||
@ -3199,8 +3204,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
|
||||
intel_engine_dump(engine, &p, "%s\n", engine->name);
|
||||
}
|
||||
|
||||
if (test_and_set_bit(I915_WEDGED, &i915->gpu_error.flags))
|
||||
goto out;
|
||||
GEM_TRACE("start\n");
|
||||
|
||||
/*
|
||||
* First, stop submission to hw, but do not yet complete requests by
|
||||
@ -3236,23 +3240,31 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
|
||||
intel_engine_wakeup(engine);
|
||||
}
|
||||
|
||||
out:
|
||||
GEM_TRACE("end\n");
|
||||
smp_mb__before_atomic();
|
||||
set_bit(I915_WEDGED, &error->flags);
|
||||
|
||||
wake_up_all(&i915->gpu_error.reset_queue);
|
||||
GEM_TRACE("end\n");
|
||||
mutex_unlock(&error->wedge_mutex);
|
||||
|
||||
wake_up_all(&error->reset_queue);
|
||||
}
|
||||
|
||||
bool i915_gem_unset_wedged(struct drm_i915_private *i915)
|
||||
{
|
||||
struct i915_gpu_error *error = &i915->gpu_error;
|
||||
struct i915_timeline *tl;
|
||||
bool ret = false;
|
||||
|
||||
lockdep_assert_held(&i915->drm.struct_mutex);
|
||||
if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
|
||||
|
||||
if (!test_bit(I915_WEDGED, &error->flags))
|
||||
return true;
|
||||
|
||||
if (!i915->gt.scratch) /* Never full initialised, recovery impossible */
|
||||
return false;
|
||||
|
||||
mutex_lock(&error->wedge_mutex);
|
||||
|
||||
GEM_TRACE("start\n");
|
||||
|
||||
/*
|
||||
@ -3286,7 +3298,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
|
||||
*/
|
||||
if (dma_fence_default_wait(&rq->fence, true,
|
||||
MAX_SCHEDULE_TIMEOUT) < 0)
|
||||
return false;
|
||||
goto unlock;
|
||||
}
|
||||
i915_retire_requests(i915);
|
||||
GEM_BUG_ON(i915->gt.active_requests);
|
||||
@ -3309,8 +3321,11 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
|
||||
|
||||
smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
|
||||
clear_bit(I915_WEDGED, &i915->gpu_error.flags);
|
||||
ret = true;
|
||||
unlock:
|
||||
mutex_unlock(&i915->gpu_error.wedge_mutex);
|
||||
|
||||
return true;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
@ -5706,6 +5721,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
|
||||
i915_gem_idle_work_handler);
|
||||
init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
|
||||
init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
|
||||
mutex_init(&dev_priv->gpu_error.wedge_mutex);
|
||||
|
||||
atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
|
||||
|
||||
|
@ -271,8 +271,8 @@ struct i915_gpu_error {
|
||||
#define I915_RESET_BACKOFF 0
|
||||
#define I915_RESET_HANDOFF 1
|
||||
#define I915_RESET_MODESET 2
|
||||
#define I915_RESET_ENGINE 3
|
||||
#define I915_WEDGED (BITS_PER_LONG - 1)
|
||||
#define I915_RESET_ENGINE (I915_WEDGED - I915_NUM_ENGINES)
|
||||
|
||||
/** Number of times an engine has been reset */
|
||||
u32 reset_engine_count[I915_NUM_ENGINES];
|
||||
@ -283,6 +283,8 @@ struct i915_gpu_error {
|
||||
/** Reason for the current *global* reset */
|
||||
const char *reason;
|
||||
|
||||
struct mutex wedge_mutex; /* serialises wedging/unwedging */
|
||||
|
||||
/**
|
||||
* Waitqueue to signal when a hang is detected. Used to for waiters
|
||||
* to release the struct_mutex for the reset to procede.
|
||||
|
@ -188,6 +188,7 @@ struct drm_i915_private *mock_gem_device(void)
|
||||
|
||||
init_waitqueue_head(&i915->gpu_error.wait_queue);
|
||||
init_waitqueue_head(&i915->gpu_error.reset_queue);
|
||||
mutex_init(&i915->gpu_error.wedge_mutex);
|
||||
|
||||
i915->wq = alloc_ordered_workqueue("mock", 0);
|
||||
if (!i915->wq)
|
||||
|
Loading…
Reference in New Issue
Block a user