drm/i915: include hangcheck action and score in error_state
Score and action reveals what all the rings were doing and why hang was declared. Add idle state so that we can distinguish between waiting and idle ring. v2: - add idle as a hangcheck action - consensed hangcheck status to single line (Chris) - mark active explicitly when we are making progress (Chris) Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
This commit is contained in:
parent
be62acb4cc
commit
da66146425
@ -328,6 +328,8 @@ struct drm_i915_error_state {
|
||||
u32 *active_bo_count, *pinned_bo_count;
|
||||
struct intel_overlay_error_state *overlay;
|
||||
struct intel_display_error_state *display;
|
||||
int hangcheck_score[I915_NUM_RINGS];
|
||||
enum intel_ring_hangcheck_action hangcheck_action[I915_NUM_RINGS];
|
||||
};
|
||||
|
||||
struct intel_crtc_config;
|
||||
|
@ -213,6 +213,24 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
|
||||
}
|
||||
}
|
||||
|
||||
static const char *hangcheck_action_to_str(enum intel_ring_hangcheck_action a)
|
||||
{
|
||||
switch (a) {
|
||||
case HANGCHECK_IDLE:
|
||||
return "idle";
|
||||
case HANGCHECK_WAIT:
|
||||
return "wait";
|
||||
case HANGCHECK_ACTIVE:
|
||||
return "active";
|
||||
case HANGCHECK_KICK:
|
||||
return "kick";
|
||||
case HANGCHECK_HUNG:
|
||||
return "hung";
|
||||
}
|
||||
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
|
||||
struct drm_device *dev,
|
||||
struct drm_i915_error_state *error,
|
||||
@ -253,6 +271,9 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
|
||||
err_printf(m, " waiting: %s\n", yesno(error->waiting[ring]));
|
||||
err_printf(m, " ring->head: 0x%08x\n", error->cpu_ring_head[ring]);
|
||||
err_printf(m, " ring->tail: 0x%08x\n", error->cpu_ring_tail[ring]);
|
||||
err_printf(m, " hangcheck: %s [%d]\n",
|
||||
hangcheck_action_to_str(error->hangcheck_action[ring]),
|
||||
error->hangcheck_score[ring]);
|
||||
}
|
||||
|
||||
void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
|
||||
@ -718,6 +739,9 @@ static void i915_record_ring_state(struct drm_device *dev,
|
||||
|
||||
error->cpu_ring_head[ring->id] = ring->head;
|
||||
error->cpu_ring_tail[ring->id] = ring->tail;
|
||||
|
||||
error->hangcheck_score[ring->id] = ring->hangcheck.score;
|
||||
error->hangcheck_action[ring->id] = ring->hangcheck.action;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1975,6 +1975,8 @@ static void i915_hangcheck_elapsed(unsigned long data)
|
||||
|
||||
if (ring->hangcheck.seqno == seqno) {
|
||||
if (ring_idle(ring, seqno)) {
|
||||
ring->hangcheck.action = HANGCHECK_IDLE;
|
||||
|
||||
if (waitqueue_active(&ring->irq_queue)) {
|
||||
/* Issue a wake-up to catch stuck h/w. */
|
||||
DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
|
||||
@ -2003,6 +2005,7 @@ static void i915_hangcheck_elapsed(unsigned long data)
|
||||
acthd);
|
||||
|
||||
switch (ring->hangcheck.action) {
|
||||
case HANGCHECK_IDLE:
|
||||
case HANGCHECK_WAIT:
|
||||
break;
|
||||
case HANGCHECK_ACTIVE:
|
||||
@ -2018,6 +2021,8 @@ static void i915_hangcheck_elapsed(unsigned long data)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ring->hangcheck.action = HANGCHECK_ACTIVE;
|
||||
|
||||
/* Gradually reduce the count so that we catch DoS
|
||||
* attempts across multiple batches.
|
||||
*/
|
||||
|
@ -34,6 +34,7 @@ struct intel_hw_status_page {
|
||||
#define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR((ring)->mmio_base), val)
|
||||
|
||||
enum intel_ring_hangcheck_action {
|
||||
HANGCHECK_IDLE = 0,
|
||||
HANGCHECK_WAIT,
|
||||
HANGCHECK_ACTIVE,
|
||||
HANGCHECK_KICK,
|
||||
|
Loading…
Reference in New Issue
Block a user