drm/i915: Cache the error string

Currently, we convert the error state into a string every time we read
from sysfs (and sysfs reads in page size (4KiB) chunks). We do try to
window the string and only capture the portion that is being read, but
that means that we must always convert up to the window to find the
start. For a very large error state bordering on EXEC_OBJECT_CAPTURE
abuse, this is noticeable as it degrades to O(N^2)!

As we do not have a convenient hook for sysfs open(), and we would like
to keep the lazy conversion into a string, do the conversion of the
whole string on the first read and keep the string until the error state
is freed.

v2: Don't double advance simple_read_from_buffer
v3: Due to extreme pain of lack of vrealloc, use a scatterlist
v4: Keep the forward iterator loosely cached
v5: Stylistic improvements to reduce patch size

Reported-by: Jason Ekstrand <jason@jlekstrand.net>
Testcase: igt/gem_exec_capture/many*
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181123132325.26541-1-chris@chris-wilson.co.uk
This commit is contained in:
Chris Wilson 2018-11-23 13:23:25 +00:00
parent 16c36c4cb8
commit 0e39037b31
4 changed files with 247 additions and 179 deletions

View File

@ -943,30 +943,30 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
static ssize_t gpu_state_read(struct file *file, char __user *ubuf,
size_t count, loff_t *pos)
{
struct i915_gpu_state *error = file->private_data;
struct drm_i915_error_state_buf str;
struct i915_gpu_state *error;
ssize_t ret;
loff_t tmp;
void *buf;
error = file->private_data;
if (!error)
return 0;
ret = i915_error_state_buf_init(&str, error->i915, count, *pos);
if (ret)
return ret;
/* Bounce buffer required because of kernfs __user API convenience. */
buf = kmalloc(count, GFP_KERNEL);
if (!buf)
return -ENOMEM;
ret = i915_error_state_to_str(&str, error);
if (ret)
ret = i915_gpu_state_copy_to_buffer(error, buf, *pos, count);
if (ret <= 0)
goto out;
tmp = 0;
ret = simple_read_from_buffer(ubuf, count, &tmp, str.buf, str.bytes);
if (ret < 0)
goto out;
if (!copy_to_user(ubuf, buf, ret))
*pos += ret;
else
ret = -EFAULT;
*pos = str.start + ret;
out:
i915_error_state_buf_release(&str);
kfree(buf);
return ret;
}

View File

@ -27,11 +27,14 @@
*
*/
#include <linux/utsname.h>
#include <linux/stop_machine.h>
#include <linux/zlib.h>
#include <drm/drm_print.h>
#include <linux/ascii85.h>
#include <linux/nmi.h>
#include <linux/scatterlist.h>
#include <linux/stop_machine.h>
#include <linux/utsname.h>
#include <linux/zlib.h>
#include <drm/drm_print.h>
#include "i915_gpu_error.h"
#include "i915_drv.h"
@ -77,112 +80,110 @@ static const char *purgeable_flag(int purgeable)
return purgeable ? " purgeable" : "";
}
static bool __i915_error_ok(struct drm_i915_error_state_buf *e)
static void __sg_set_buf(struct scatterlist *sg,
void *addr, unsigned int len, loff_t it)
{
if (!e->err && WARN(e->bytes > (e->size - 1), "overflow")) {
e->err = -ENOSPC;
return false;
}
if (e->bytes == e->size - 1 || e->err)
return false;
return true;
sg->page_link = (unsigned long)virt_to_page(addr);
sg->offset = offset_in_page(addr);
sg->length = len;
sg->dma_address = it;
}
static bool __i915_error_seek(struct drm_i915_error_state_buf *e,
unsigned len)
static bool __i915_error_grow(struct drm_i915_error_state_buf *e, size_t len)
{
if (e->pos + len <= e->start) {
e->pos += len;
if (!len)
return false;
if (e->bytes + len + 1 <= e->size)
return true;
if (e->bytes) {
__sg_set_buf(e->cur++, e->buf, e->bytes, e->iter);
e->iter += e->bytes;
e->buf = NULL;
e->bytes = 0;
}
/* First vsnprintf needs to fit in its entirety for memmove */
if (len >= e->size) {
e->err = -EIO;
return false;
}
if (e->cur == e->end) {
struct scatterlist *sgl;
return true;
}
static void __i915_error_advance(struct drm_i915_error_state_buf *e,
unsigned len)
{
/* If this is first printf in this window, adjust it so that
* start position matches start of the buffer
*/
if (e->pos < e->start) {
const size_t off = e->start - e->pos;
/* Should not happen but be paranoid */
if (off > len || e->bytes) {
e->err = -EIO;
return;
sgl = (typeof(sgl))__get_free_page(GFP_KERNEL);
if (!sgl) {
e->err = -ENOMEM;
return false;
}
memmove(e->buf, e->buf + off, len - off);
e->bytes = len - off;
e->pos = e->start;
return;
if (e->cur) {
e->cur->offset = 0;
e->cur->length = 0;
e->cur->page_link =
(unsigned long)sgl | SG_CHAIN;
} else {
e->sgl = sgl;
}
e->cur = sgl;
e->end = sgl + SG_MAX_SINGLE_ALLOC - 1;
}
e->bytes += len;
e->pos += len;
e->size = ALIGN(len + 1, SZ_64K);
e->buf = kmalloc(e->size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
if (!e->buf) {
e->size = PAGE_ALIGN(len + 1);
e->buf = kmalloc(e->size, GFP_KERNEL);
}
if (!e->buf) {
e->err = -ENOMEM;
return false;
}
return true;
}
__printf(2, 0)
static void i915_error_vprintf(struct drm_i915_error_state_buf *e,
const char *f, va_list args)
const char *fmt, va_list args)
{
unsigned len;
va_list ap;
int len;
if (!__i915_error_ok(e))
if (e->err)
return;
/* Seek the first printf which is hits start position */
if (e->pos < e->start) {
va_list tmp;
va_copy(tmp, args);
len = vsnprintf(NULL, 0, f, tmp);
va_end(tmp);
if (!__i915_error_seek(e, len))
return;
va_copy(ap, args);
len = vsnprintf(NULL, 0, fmt, ap);
va_end(ap);
if (len <= 0) {
e->err = len;
return;
}
len = vsnprintf(e->buf + e->bytes, e->size - e->bytes, f, args);
if (len >= e->size - e->bytes)
len = e->size - e->bytes - 1;
if (!__i915_error_grow(e, len))
return;
__i915_error_advance(e, len);
GEM_BUG_ON(e->bytes >= e->size);
len = vscnprintf(e->buf + e->bytes, e->size - e->bytes, fmt, args);
if (len < 0) {
e->err = len;
return;
}
e->bytes += len;
}
static void i915_error_puts(struct drm_i915_error_state_buf *e,
const char *str)
static void i915_error_puts(struct drm_i915_error_state_buf *e, const char *str)
{
unsigned len;
if (!__i915_error_ok(e))
if (e->err || !str)
return;
len = strlen(str);
if (!__i915_error_grow(e, len))
return;
/* Seek the first printf which is hits start position */
if (e->pos < e->start) {
if (!__i915_error_seek(e, len))
return;
}
if (len >= e->size - e->bytes)
len = e->size - e->bytes - 1;
GEM_BUG_ON(e->bytes + len > e->size);
memcpy(e->buf + e->bytes, str, len);
__i915_error_advance(e, len);
e->bytes += len;
}
#define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__)
@ -268,6 +269,8 @@ static int compress_page(struct compress *c,
if (zlib_deflate(zstream, Z_NO_FLUSH) != Z_OK)
return -EIO;
touch_nmi_watchdog();
} while (zstream->avail_in);
/* Fallback to uncompressed if we increase size? */
@ -635,22 +638,30 @@ static void err_print_uc(struct drm_i915_error_state_buf *m,
print_error_obj(m, NULL, "GuC log buffer", error_uc->guc_log);
}
int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
const struct i915_gpu_state *error)
static void err_free_sgl(struct scatterlist *sgl)
{
while (sgl) {
struct scatterlist *sg;
for (sg = sgl; !sg_is_chain(sg); sg++) {
kfree(sg_virt(sg));
if (sg_is_last(sg))
break;
}
sg = sg_is_last(sg) ? NULL : sg_chain_ptr(sg);
free_page((unsigned long)sgl);
sgl = sg;
}
}
static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
struct i915_gpu_state *error)
{
struct drm_i915_private *dev_priv = m->i915;
struct drm_i915_error_object *obj;
struct timespec64 ts;
int i, j;
if (!error) {
err_printf(m, "No error state collected\n");
return 0;
}
if (IS_ERR(error))
return PTR_ERR(error);
if (*error->error_msg)
err_printf(m, "%s\n", error->error_msg);
err_printf(m, "Kernel: %s\n", init_utsname()->release);
@ -683,12 +694,12 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
err_printf(m, "Reset count: %u\n", error->reset_count);
err_printf(m, "Suspend count: %u\n", error->suspend_count);
err_printf(m, "Platform: %s\n", intel_platform_name(error->device_info.platform));
err_print_pciid(m, error->i915);
err_print_pciid(m, m->i915);
err_printf(m, "IOMMU enabled?: %d\n", error->iommu);
if (HAS_CSR(dev_priv)) {
struct intel_csr *csr = &dev_priv->csr;
if (HAS_CSR(m->i915)) {
struct intel_csr *csr = &m->i915->csr;
err_printf(m, "DMC loaded: %s\n",
yesno(csr->dmc_payload != NULL));
@ -708,22 +719,23 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake);
err_printf(m, "DERRMR: 0x%08x\n", error->derrmr);
err_printf(m, "CCID: 0x%08x\n", error->ccid);
err_printf(m, "Missed interrupts: 0x%08lx\n", dev_priv->gpu_error.missed_irq_rings);
err_printf(m, "Missed interrupts: 0x%08lx\n",
m->i915->gpu_error.missed_irq_rings);
for (i = 0; i < error->nfence; i++)
err_printf(m, " fence[%d] = %08llx\n", i, error->fence[i]);
if (INTEL_GEN(dev_priv) >= 6) {
if (INTEL_GEN(m->i915) >= 6) {
err_printf(m, "ERROR: 0x%08x\n", error->error);
if (INTEL_GEN(dev_priv) >= 8)
if (INTEL_GEN(m->i915) >= 8)
err_printf(m, "FAULT_TLB_DATA: 0x%08x 0x%08x\n",
error->fault_data1, error->fault_data0);
err_printf(m, "DONE_REG: 0x%08x\n", error->done_reg);
}
if (IS_GEN7(dev_priv))
if (IS_GEN7(m->i915))
err_printf(m, "ERR_INT: 0x%08x\n", error->err_int);
for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
@ -745,7 +757,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
len += scnprintf(buf + len, sizeof(buf), "%s%s",
first ? "" : ", ",
dev_priv->engine[j]->name);
m->i915->engine[j]->name);
first = 0;
}
scnprintf(buf + len, sizeof(buf), ")");
@ -763,7 +775,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
obj = ee->batchbuffer;
if (obj) {
err_puts(m, dev_priv->engine[i]->name);
err_puts(m, m->i915->engine[i]->name);
if (ee->context.pid)
err_printf(m, " (submitted by %s [%d], ctx %d [%d], score %d%s)",
ee->context.comm,
@ -775,16 +787,16 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
err_printf(m, " --- gtt_offset = 0x%08x %08x\n",
upper_32_bits(obj->gtt_offset),
lower_32_bits(obj->gtt_offset));
print_error_obj(m, dev_priv->engine[i], NULL, obj);
print_error_obj(m, m->i915->engine[i], NULL, obj);
}
for (j = 0; j < ee->user_bo_count; j++)
print_error_obj(m, dev_priv->engine[i],
print_error_obj(m, m->i915->engine[i],
"user", ee->user_bo[j]);
if (ee->num_requests) {
err_printf(m, "%s --- %d requests\n",
dev_priv->engine[i]->name,
m->i915->engine[i]->name,
ee->num_requests);
for (j = 0; j < ee->num_requests; j++)
error_print_request(m, " ",
@ -794,10 +806,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
if (IS_ERR(ee->waiters)) {
err_printf(m, "%s --- ? waiters [unable to acquire spinlock]\n",
dev_priv->engine[i]->name);
m->i915->engine[i]->name);
} else if (ee->num_waiters) {
err_printf(m, "%s --- %d waiters\n",
dev_priv->engine[i]->name,
m->i915->engine[i]->name,
ee->num_waiters);
for (j = 0; j < ee->num_waiters; j++) {
err_printf(m, " seqno 0x%08x for %s [%d]\n",
@ -807,22 +819,22 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
}
}
print_error_obj(m, dev_priv->engine[i],
print_error_obj(m, m->i915->engine[i],
"ringbuffer", ee->ringbuffer);
print_error_obj(m, dev_priv->engine[i],
print_error_obj(m, m->i915->engine[i],
"HW Status", ee->hws_page);
print_error_obj(m, dev_priv->engine[i],
print_error_obj(m, m->i915->engine[i],
"HW context", ee->ctx);
print_error_obj(m, dev_priv->engine[i],
print_error_obj(m, m->i915->engine[i],
"WA context", ee->wa_ctx);
print_error_obj(m, dev_priv->engine[i],
print_error_obj(m, m->i915->engine[i],
"WA batchbuffer", ee->wa_batchbuffer);
print_error_obj(m, dev_priv->engine[i],
print_error_obj(m, m->i915->engine[i],
"NULL context", ee->default_state);
}
@ -835,43 +847,107 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
err_print_capabilities(m, &error->device_info, &error->driver_caps);
err_print_params(m, &error->params);
err_print_uc(m, &error->uc);
}
if (m->bytes == 0 && m->err)
return m->err;
static int err_print_to_sgl(struct i915_gpu_state *error)
{
struct drm_i915_error_state_buf m;
if (IS_ERR(error))
return PTR_ERR(error);
if (READ_ONCE(error->sgl))
return 0;
memset(&m, 0, sizeof(m));
m.i915 = error->i915;
__err_print_to_sgl(&m, error);
if (m.buf) {
__sg_set_buf(m.cur++, m.buf, m.bytes, m.iter);
m.bytes = 0;
m.buf = NULL;
}
if (m.cur) {
GEM_BUG_ON(m.end < m.cur);
sg_mark_end(m.cur - 1);
}
GEM_BUG_ON(m.sgl && !m.cur);
if (m.err) {
err_free_sgl(m.sgl);
return m.err;
}
if (cmpxchg(&error->sgl, NULL, m.sgl))
err_free_sgl(m.sgl);
return 0;
}
int i915_error_state_buf_init(struct drm_i915_error_state_buf *ebuf,
struct drm_i915_private *i915,
size_t count, loff_t pos)
ssize_t i915_gpu_state_copy_to_buffer(struct i915_gpu_state *error,
char *buf, loff_t off, size_t rem)
{
memset(ebuf, 0, sizeof(*ebuf));
ebuf->i915 = i915;
struct scatterlist *sg;
size_t count;
loff_t pos;
int err;
/* We need to have enough room to store any i915_error_state printf
* so that we can move it to start position.
*/
ebuf->size = count + 1 > PAGE_SIZE ? count + 1 : PAGE_SIZE;
ebuf->buf = kmalloc(ebuf->size,
GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
if (!error || !rem)
return 0;
if (ebuf->buf == NULL) {
ebuf->size = PAGE_SIZE;
ebuf->buf = kmalloc(ebuf->size, GFP_KERNEL);
}
err = err_print_to_sgl(error);
if (err)
return err;
if (ebuf->buf == NULL) {
ebuf->size = 128;
ebuf->buf = kmalloc(ebuf->size, GFP_KERNEL);
}
sg = READ_ONCE(error->fit);
if (!sg || off < sg->dma_address)
sg = error->sgl;
if (!sg)
return 0;
if (ebuf->buf == NULL)
return -ENOMEM;
pos = sg->dma_address;
count = 0;
do {
size_t len, start;
ebuf->start = pos;
if (sg_is_chain(sg)) {
sg = sg_chain_ptr(sg);
GEM_BUG_ON(sg_is_chain(sg));
}
return 0;
len = sg->length;
if (pos + len <= off) {
pos += len;
continue;
}
start = sg->offset;
if (pos < off) {
GEM_BUG_ON(off - pos > len);
len -= off - pos;
start += off - pos;
pos = off;
}
len = min(len, rem);
GEM_BUG_ON(!len || len > sg->length);
memcpy(buf, page_address(sg_page(sg)) + start, len);
count += len;
pos += len;
buf += len;
rem -= len;
if (!rem) {
WRITE_ONCE(error->fit, sg);
break;
}
} while (!sg_is_last(sg++));
return count;
}
static void i915_error_object_free(struct drm_i915_error_object *obj)
@ -944,6 +1020,7 @@ void __i915_gpu_state_free(struct kref *error_ref)
cleanup_params(error);
cleanup_uc_state(error);
err_free_sgl(error->sgl);
kfree(error);
}

View File

@ -192,6 +192,8 @@ struct i915_gpu_state {
} *active_bo[I915_NUM_ENGINES], *pinned_bo;
u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count;
struct i915_address_space *active_vm[I915_NUM_ENGINES];
struct scatterlist *sgl, *fit;
};
struct i915_gpu_error {
@ -298,29 +300,20 @@ struct i915_gpu_error {
struct drm_i915_error_state_buf {
struct drm_i915_private *i915;
unsigned int bytes;
unsigned int size;
struct scatterlist *sgl, *cur, *end;
char *buf;
size_t bytes;
size_t size;
loff_t iter;
int err;
u8 *buf;
loff_t start;
loff_t pos;
};
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
__printf(2, 3)
void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
const struct i915_gpu_state *gpu);
int i915_error_state_buf_init(struct drm_i915_error_state_buf *eb,
struct drm_i915_private *i915,
size_t count, loff_t pos);
static inline void
i915_error_state_buf_release(struct drm_i915_error_state_buf *eb)
{
kfree(eb->buf);
}
struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915);
void i915_capture_error_state(struct drm_i915_private *dev_priv,
@ -334,6 +327,9 @@ i915_gpu_state_get(struct i915_gpu_state *gpu)
return gpu;
}
ssize_t i915_gpu_state_copy_to_buffer(struct i915_gpu_state *error,
char *buf, loff_t offset, size_t count);
void __i915_gpu_state_free(struct kref *kref);
static inline void i915_gpu_state_put(struct i915_gpu_state *gpu)
{

View File

@ -516,26 +516,21 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
{
struct device *kdev = kobj_to_dev(kobj);
struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
struct drm_i915_error_state_buf error_str;
struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
struct i915_gpu_state *gpu;
ssize_t ret;
ret = i915_error_state_buf_init(&error_str, dev_priv, count, off);
if (ret)
return ret;
gpu = i915_first_error_state(i915);
if (gpu) {
ret = i915_gpu_state_copy_to_buffer(gpu, buf, off, count);
i915_gpu_state_put(gpu);
} else {
const char *str = "No error state collected\n";
size_t len = strlen(str);
gpu = i915_first_error_state(dev_priv);
ret = i915_error_state_to_str(&error_str, gpu);
if (ret)
goto out;
ret = count < error_str.bytes ? count : error_str.bytes;
memcpy(buf, error_str.buf, ret);
out:
i915_gpu_state_put(gpu);
i915_error_state_buf_release(&error_str);
ret = min_t(size_t, count, len - off);
memcpy(buf, str + off, ret);
}
return ret;
}