drm/i915: Record more information about the hanging contexts
Include extra information such as the user_handle and hw_id so that userspace can identify which of their contexts hung, useful if they are performing self-diagnositics. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20170129092433.10483-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
This commit is contained in:
parent
0102ba1fd8
commit
4fa6053efd
@ -969,6 +969,16 @@ struct drm_i915_error_state {
|
|||||||
u32 semaphore_mboxes[I915_NUM_ENGINES - 1];
|
u32 semaphore_mboxes[I915_NUM_ENGINES - 1];
|
||||||
struct intel_instdone instdone;
|
struct intel_instdone instdone;
|
||||||
|
|
||||||
|
struct drm_i915_error_context {
|
||||||
|
char comm[TASK_COMM_LEN];
|
||||||
|
pid_t pid;
|
||||||
|
u32 handle;
|
||||||
|
u32 hw_id;
|
||||||
|
int ban_score;
|
||||||
|
int active;
|
||||||
|
int guilty;
|
||||||
|
} context;
|
||||||
|
|
||||||
struct drm_i915_error_object {
|
struct drm_i915_error_object {
|
||||||
u64 gtt_offset;
|
u64 gtt_offset;
|
||||||
u64 gtt_size;
|
u64 gtt_size;
|
||||||
@ -1002,10 +1012,6 @@ struct drm_i915_error_state {
|
|||||||
u32 pp_dir_base;
|
u32 pp_dir_base;
|
||||||
};
|
};
|
||||||
} vm_info;
|
} vm_info;
|
||||||
|
|
||||||
pid_t pid;
|
|
||||||
char comm[TASK_COMM_LEN];
|
|
||||||
int context_bans;
|
|
||||||
} engine[I915_NUM_ENGINES];
|
} engine[I915_NUM_ENGINES];
|
||||||
|
|
||||||
struct drm_i915_error_buffer {
|
struct drm_i915_error_buffer {
|
||||||
|
@ -384,6 +384,15 @@ static void error_print_request(struct drm_i915_error_state_buf *m,
|
|||||||
erq->head, erq->tail);
|
erq->head, erq->tail);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void error_print_context(struct drm_i915_error_state_buf *m,
|
||||||
|
const char *header,
|
||||||
|
struct drm_i915_error_context *ctx)
|
||||||
|
{
|
||||||
|
err_printf(m, "%s%s[%d] user_handle %d hw_id %d, ban score %d guilty %d active %d\n",
|
||||||
|
header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id,
|
||||||
|
ctx->ban_score, ctx->guilty, ctx->active);
|
||||||
|
}
|
||||||
|
|
||||||
static void error_print_engine(struct drm_i915_error_state_buf *m,
|
static void error_print_engine(struct drm_i915_error_state_buf *m,
|
||||||
struct drm_i915_error_engine *ee)
|
struct drm_i915_error_engine *ee)
|
||||||
{
|
{
|
||||||
@ -457,6 +466,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
|
|||||||
|
|
||||||
error_print_request(m, " ELSP[0]: ", &ee->execlist[0]);
|
error_print_request(m, " ELSP[0]: ", &ee->execlist[0]);
|
||||||
error_print_request(m, " ELSP[1]: ", &ee->execlist[1]);
|
error_print_request(m, " ELSP[1]: ", &ee->execlist[1]);
|
||||||
|
error_print_context(m, " Active context: ", &ee->context);
|
||||||
}
|
}
|
||||||
|
|
||||||
void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
|
void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
|
||||||
@ -562,12 +572,12 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
|
|||||||
|
|
||||||
for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
|
for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
|
||||||
if (error->engine[i].hangcheck_stalled &&
|
if (error->engine[i].hangcheck_stalled &&
|
||||||
error->engine[i].pid != -1) {
|
error->engine[i].context.pid) {
|
||||||
err_printf(m, "Active process (on ring %s): %s [%d], context bans %d\n",
|
err_printf(m, "Active process (on ring %s): %s [%d], score %d\n",
|
||||||
engine_str(i),
|
engine_str(i),
|
||||||
error->engine[i].comm,
|
error->engine[i].context.comm,
|
||||||
error->engine[i].pid,
|
error->engine[i].context.pid,
|
||||||
error->engine[i].context_bans);
|
error->engine[i].context.ban_score);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
err_printf(m, "Reset count: %u\n", error->reset_count);
|
err_printf(m, "Reset count: %u\n", error->reset_count);
|
||||||
@ -658,11 +668,13 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
|
|||||||
obj = ee->batchbuffer;
|
obj = ee->batchbuffer;
|
||||||
if (obj) {
|
if (obj) {
|
||||||
err_puts(m, dev_priv->engine[i]->name);
|
err_puts(m, dev_priv->engine[i]->name);
|
||||||
if (ee->pid != -1)
|
if (ee->context.pid)
|
||||||
err_printf(m, " (submitted by %s [%d], bans %d)",
|
err_printf(m, " (submitted by %s [%d], ctx %d [%d], score %d)",
|
||||||
ee->comm,
|
ee->context.comm,
|
||||||
ee->pid,
|
ee->context.pid,
|
||||||
ee->context_bans);
|
ee->context.handle,
|
||||||
|
ee->context.hw_id,
|
||||||
|
ee->context.ban_score);
|
||||||
err_printf(m, " --- gtt_offset = 0x%08x %08x\n",
|
err_printf(m, " --- gtt_offset = 0x%08x %08x\n",
|
||||||
upper_32_bits(obj->gtt_offset),
|
upper_32_bits(obj->gtt_offset),
|
||||||
lower_32_bits(obj->gtt_offset));
|
lower_32_bits(obj->gtt_offset));
|
||||||
@ -1267,6 +1279,28 @@ static void error_record_engine_execlists(struct intel_engine_cs *engine,
|
|||||||
&ee->execlist[n]);
|
&ee->execlist[n]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void record_context(struct drm_i915_error_context *e,
|
||||||
|
struct i915_gem_context *ctx)
|
||||||
|
{
|
||||||
|
if (ctx->pid) {
|
||||||
|
struct task_struct *task;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
task = pid_task(ctx->pid, PIDTYPE_PID);
|
||||||
|
if (task) {
|
||||||
|
strcpy(e->comm, task->comm);
|
||||||
|
e->pid = task->pid;
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
e->handle = ctx->user_handle;
|
||||||
|
e->hw_id = ctx->hw_id;
|
||||||
|
e->ban_score = ctx->ban_score;
|
||||||
|
e->guilty = ctx->guilty_count;
|
||||||
|
e->active = ctx->active_count;
|
||||||
|
}
|
||||||
|
|
||||||
static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
|
static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
|
||||||
struct drm_i915_error_state *error)
|
struct drm_i915_error_state *error)
|
||||||
{
|
{
|
||||||
@ -1281,7 +1315,6 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
|
|||||||
struct drm_i915_error_engine *ee = &error->engine[i];
|
struct drm_i915_error_engine *ee = &error->engine[i];
|
||||||
struct drm_i915_gem_request *request;
|
struct drm_i915_gem_request *request;
|
||||||
|
|
||||||
ee->pid = -1;
|
|
||||||
ee->engine_id = -1;
|
ee->engine_id = -1;
|
||||||
|
|
||||||
if (!engine)
|
if (!engine)
|
||||||
@ -1296,11 +1329,12 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
|
|||||||
request = i915_gem_find_active_request(engine);
|
request = i915_gem_find_active_request(engine);
|
||||||
if (request) {
|
if (request) {
|
||||||
struct intel_ring *ring;
|
struct intel_ring *ring;
|
||||||
struct pid *pid;
|
|
||||||
|
|
||||||
ee->vm = request->ctx->ppgtt ?
|
ee->vm = request->ctx->ppgtt ?
|
||||||
&request->ctx->ppgtt->base : &ggtt->base;
|
&request->ctx->ppgtt->base : &ggtt->base;
|
||||||
|
|
||||||
|
record_context(&ee->context, request->ctx);
|
||||||
|
|
||||||
/* We need to copy these to an anonymous buffer
|
/* We need to copy these to an anonymous buffer
|
||||||
* as the simplest method to avoid being overwritten
|
* as the simplest method to avoid being overwritten
|
||||||
* by userspace.
|
* by userspace.
|
||||||
@ -1318,19 +1352,6 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
|
|||||||
i915_error_object_create(dev_priv,
|
i915_error_object_create(dev_priv,
|
||||||
request->ctx->engine[i].state);
|
request->ctx->engine[i].state);
|
||||||
|
|
||||||
pid = request->ctx->pid;
|
|
||||||
if (pid) {
|
|
||||||
struct task_struct *task;
|
|
||||||
|
|
||||||
rcu_read_lock();
|
|
||||||
task = pid_task(pid, PIDTYPE_PID);
|
|
||||||
if (task) {
|
|
||||||
strcpy(ee->comm, task->comm);
|
|
||||||
ee->pid = task->pid;
|
|
||||||
}
|
|
||||||
rcu_read_unlock();
|
|
||||||
}
|
|
||||||
|
|
||||||
error->simulated |=
|
error->simulated |=
|
||||||
i915_gem_context_no_error_capture(request->ctx);
|
i915_gem_context_no_error_capture(request->ctx);
|
||||||
|
|
||||||
@ -1534,12 +1555,12 @@ static void i915_error_capture_msg(struct drm_i915_private *dev_priv,
|
|||||||
"GPU HANG: ecode %d:%d:0x%08x",
|
"GPU HANG: ecode %d:%d:0x%08x",
|
||||||
INTEL_GEN(dev_priv), engine_id, ecode);
|
INTEL_GEN(dev_priv), engine_id, ecode);
|
||||||
|
|
||||||
if (engine_id != -1 && error->engine[engine_id].pid != -1)
|
if (engine_id != -1 && error->engine[engine_id].context.pid)
|
||||||
len += scnprintf(error->error_msg + len,
|
len += scnprintf(error->error_msg + len,
|
||||||
sizeof(error->error_msg) - len,
|
sizeof(error->error_msg) - len,
|
||||||
", in %s [%d]",
|
", in %s [%d]",
|
||||||
error->engine[engine_id].comm,
|
error->engine[engine_id].context.comm,
|
||||||
error->engine[engine_id].pid);
|
error->engine[engine_id].context.pid);
|
||||||
|
|
||||||
scnprintf(error->error_msg + len, sizeof(error->error_msg) - len,
|
scnprintf(error->error_msg + len, sizeof(error->error_msg) - len,
|
||||||
", reason: %s, action: %s",
|
", reason: %s, action: %s",
|
||||||
|
Loading…
Reference in New Issue
Block a user