linux/drivers/gpu/drm/i915/i915_trace.h

456 lines
11 KiB
C
Raw Normal View History

#if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
#define _I915_TRACE_H_
#include <linux/stringify.h>
#include <linux/types.h>
#include <linux/tracepoint.h>
#include <drm/drmP.h>
#include "i915_drv.h"
#include "intel_ringbuffer.h"
#undef TRACE_SYSTEM
#define TRACE_SYSTEM i915
#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM)
#define TRACE_INCLUDE_FILE i915_trace
/* object tracking */
TRACE_EVENT(i915_gem_object_create,
TP_PROTO(struct drm_i915_gem_object *obj),
TP_ARGS(obj),
TP_STRUCT__entry(
__field(struct drm_i915_gem_object *, obj)
__field(u32, size)
),
TP_fast_assign(
__entry->obj = obj;
__entry->size = obj->base.size;
),
TP_printk("obj=%p, size=%u", __entry->obj, __entry->size)
);
TRACE_EVENT(i915_gem_object_bind,
TP_PROTO(struct drm_i915_gem_object *obj, bool mappable),
TP_ARGS(obj, mappable),
TP_STRUCT__entry(
__field(struct drm_i915_gem_object *, obj)
__field(u32, offset)
__field(u32, size)
__field(bool, mappable)
),
TP_fast_assign(
__entry->obj = obj;
__entry->offset = obj->gtt_space->start;
__entry->size = obj->gtt_space->size;
__entry->mappable = mappable;
),
TP_printk("obj=%p, offset=%08x size=%x%s",
__entry->obj, __entry->offset, __entry->size,
__entry->mappable ? ", mappable" : "")
);
TRACE_EVENT(i915_gem_object_unbind,
TP_PROTO(struct drm_i915_gem_object *obj),
TP_ARGS(obj),
TP_STRUCT__entry(
__field(struct drm_i915_gem_object *, obj)
__field(u32, offset)
__field(u32, size)
),
TP_fast_assign(
__entry->obj = obj;
__entry->offset = obj->gtt_space->start;
__entry->size = obj->gtt_space->size;
),
TP_printk("obj=%p, offset=%08x size=%x",
__entry->obj, __entry->offset, __entry->size)
);
TRACE_EVENT(i915_gem_object_change_domain,
TP_PROTO(struct drm_i915_gem_object *obj, u32 old_read, u32 old_write),
TP_ARGS(obj, old_read, old_write),
TP_STRUCT__entry(
__field(struct drm_i915_gem_object *, obj)
__field(u32, read_domains)
__field(u32, write_domain)
),
TP_fast_assign(
__entry->obj = obj;
__entry->read_domains = obj->base.read_domains | (old_read << 16);
__entry->write_domain = obj->base.write_domain | (old_write << 16);
),
TP_printk("obj=%p, read=%02x=>%02x, write=%02x=>%02x",
__entry->obj,
__entry->read_domains >> 16,
__entry->read_domains & 0xffff,
__entry->write_domain >> 16,
__entry->write_domain & 0xffff)
);
TRACE_EVENT(i915_gem_object_pwrite,
TP_PROTO(struct drm_i915_gem_object *obj, u32 offset, u32 len),
TP_ARGS(obj, offset, len),
TP_STRUCT__entry(
__field(struct drm_i915_gem_object *, obj)
__field(u32, offset)
__field(u32, len)
),
TP_fast_assign(
__entry->obj = obj;
__entry->offset = offset;
__entry->len = len;
),
TP_printk("obj=%p, offset=%u, len=%u",
__entry->obj, __entry->offset, __entry->len)
);
TRACE_EVENT(i915_gem_object_pread,
TP_PROTO(struct drm_i915_gem_object *obj, u32 offset, u32 len),
TP_ARGS(obj, offset, len),
TP_STRUCT__entry(
__field(struct drm_i915_gem_object *, obj)
__field(u32, offset)
__field(u32, len)
),
TP_fast_assign(
__entry->obj = obj;
__entry->offset = offset;
__entry->len = len;
),
TP_printk("obj=%p, offset=%u, len=%u",
__entry->obj, __entry->offset, __entry->len)
);
TRACE_EVENT(i915_gem_object_fault,
TP_PROTO(struct drm_i915_gem_object *obj, u32 index, bool gtt, bool write),
TP_ARGS(obj, index, gtt, write),
TP_STRUCT__entry(
__field(struct drm_i915_gem_object *, obj)
__field(u32, index)
__field(bool, gtt)
__field(bool, write)
),
TP_fast_assign(
__entry->obj = obj;
__entry->index = index;
__entry->gtt = gtt;
__entry->write = write;
),
TP_printk("obj=%p, %s index=%u %s",
__entry->obj,
__entry->gtt ? "GTT" : "CPU",
__entry->index,
__entry->write ? ", writable" : "")
);
DECLARE_EVENT_CLASS(i915_gem_object,
TP_PROTO(struct drm_i915_gem_object *obj),
TP_ARGS(obj),
TP_STRUCT__entry(
__field(struct drm_i915_gem_object *, obj)
),
TP_fast_assign(
__entry->obj = obj;
),
TP_printk("obj=%p", __entry->obj)
);
DEFINE_EVENT(i915_gem_object, i915_gem_object_clflush,
TP_PROTO(struct drm_i915_gem_object *obj),
TP_ARGS(obj)
);
DEFINE_EVENT(i915_gem_object, i915_gem_object_destroy,
TP_PROTO(struct drm_i915_gem_object *obj),
TP_ARGS(obj)
);
TRACE_EVENT(i915_gem_evict,
TP_PROTO(struct drm_device *dev, u32 size, u32 align, bool mappable),
TP_ARGS(dev, size, align, mappable),
TP_STRUCT__entry(
__field(u32, dev)
__field(u32, size)
__field(u32, align)
__field(bool, mappable)
),
TP_fast_assign(
__entry->dev = dev->primary->index;
__entry->size = size;
__entry->align = align;
__entry->mappable = mappable;
),
TP_printk("dev=%d, size=%d, align=%d %s",
__entry->dev, __entry->size, __entry->align,
__entry->mappable ? ", mappable" : "")
);
TRACE_EVENT(i915_gem_evict_everything,
drm/i915: Track unbound pages When dealing with a working set larger than the GATT, or even the mappable aperture when touching through the GTT, we end up with evicting objects only to rebind them at a new offset again later. Moving an object into and out of the GTT requires clflushing the pages, thus causing a double-clflush penalty for rebinding. To avoid having to clflush on rebinding, we can track the pages as they are evicted from the GTT and only relinquish those pages on memory pressure. As usual, if it were not for the handling of out-of-memory condition and having to manually shrink our own bo caches, it would be a net reduction of code. Alas. Note: The patch also contains a few changes to the last-hope evict_everything logic in i916_gem_execbuffer.c - we no longer try to only evict the purgeable stuff in a first try (since that's superflous and only helps in OOM corner-cases, not fragmented-gtt trashing situations). Also, the extraction of the get_pages retry loop from bind_to_gtt (and other callsites) to get_pages should imo have been a separate patch. v2: Ditch the newly added put_pages (for unbound objects only) in i915_gem_reset. A quick irc discussion hasn't revealed any important reason for this, so if we need this, I'd like to have a git blame'able explanation for it. v3: Undo the s/drm_malloc_ab/kmalloc/ in get_pages that Chris noticed. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> [danvet: Split out code movements and rant a bit in the commit message with a few Notes. Done v2] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-08-20 09:40:46 +00:00
TP_PROTO(struct drm_device *dev),
TP_ARGS(dev),
TP_STRUCT__entry(
__field(u32, dev)
),
TP_fast_assign(
__entry->dev = dev->primary->index;
),
drm/i915: Track unbound pages When dealing with a working set larger than the GATT, or even the mappable aperture when touching through the GTT, we end up with evicting objects only to rebind them at a new offset again later. Moving an object into and out of the GTT requires clflushing the pages, thus causing a double-clflush penalty for rebinding. To avoid having to clflush on rebinding, we can track the pages as they are evicted from the GTT and only relinquish those pages on memory pressure. As usual, if it were not for the handling of out-of-memory condition and having to manually shrink our own bo caches, it would be a net reduction of code. Alas. Note: The patch also contains a few changes to the last-hope evict_everything logic in i916_gem_execbuffer.c - we no longer try to only evict the purgeable stuff in a first try (since that's superflous and only helps in OOM corner-cases, not fragmented-gtt trashing situations). Also, the extraction of the get_pages retry loop from bind_to_gtt (and other callsites) to get_pages should imo have been a separate patch. v2: Ditch the newly added put_pages (for unbound objects only) in i915_gem_reset. A quick irc discussion hasn't revealed any important reason for this, so if we need this, I'd like to have a git blame'able explanation for it. v3: Undo the s/drm_malloc_ab/kmalloc/ in get_pages that Chris noticed. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> [danvet: Split out code movements and rant a bit in the commit message with a few Notes. Done v2] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2012-08-20 09:40:46 +00:00
TP_printk("dev=%d", __entry->dev)
);
TRACE_EVENT(i915_gem_ring_dispatch,
TP_PROTO(struct intel_ring_buffer *ring, u32 seqno, u32 flags),
TP_ARGS(ring, seqno, flags),
TP_STRUCT__entry(
__field(u32, dev)
__field(u32, ring)
__field(u32, seqno)
__field(u32, flags)
),
TP_fast_assign(
__entry->dev = ring->dev->primary->index;
__entry->ring = ring->id;
__entry->seqno = seqno;
__entry->flags = flags;
i915_trace_irq_get(ring, seqno);
),
TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x",
__entry->dev, __entry->ring, __entry->seqno, __entry->flags)
);
TRACE_EVENT(i915_gem_ring_flush,
TP_PROTO(struct intel_ring_buffer *ring, u32 invalidate, u32 flush),
TP_ARGS(ring, invalidate, flush),
TP_STRUCT__entry(
__field(u32, dev)
__field(u32, ring)
__field(u32, invalidate)
__field(u32, flush)
),
TP_fast_assign(
__entry->dev = ring->dev->primary->index;
__entry->ring = ring->id;
__entry->invalidate = invalidate;
__entry->flush = flush;
),
TP_printk("dev=%u, ring=%x, invalidate=%04x, flush=%04x",
__entry->dev, __entry->ring,
__entry->invalidate, __entry->flush)
);
DECLARE_EVENT_CLASS(i915_gem_request,
TP_PROTO(struct intel_ring_buffer *ring, u32 seqno),
TP_ARGS(ring, seqno),
TP_STRUCT__entry(
__field(u32, dev)
__field(u32, ring)
__field(u32, seqno)
),
TP_fast_assign(
__entry->dev = ring->dev->primary->index;
__entry->ring = ring->id;
__entry->seqno = seqno;
),
TP_printk("dev=%u, ring=%u, seqno=%u",
__entry->dev, __entry->ring, __entry->seqno)
);
DEFINE_EVENT(i915_gem_request, i915_gem_request_add,
TP_PROTO(struct intel_ring_buffer *ring, u32 seqno),
TP_ARGS(ring, seqno)
);
DEFINE_EVENT(i915_gem_request, i915_gem_request_complete,
TP_PROTO(struct intel_ring_buffer *ring, u32 seqno),
TP_ARGS(ring, seqno)
);
DEFINE_EVENT(i915_gem_request, i915_gem_request_retire,
TP_PROTO(struct intel_ring_buffer *ring, u32 seqno),
TP_ARGS(ring, seqno)
);
TRACE_EVENT(i915_gem_request_wait_begin,
TP_PROTO(struct intel_ring_buffer *ring, u32 seqno),
TP_ARGS(ring, seqno),
TP_STRUCT__entry(
__field(u32, dev)
__field(u32, ring)
__field(u32, seqno)
__field(bool, blocking)
),
/* NB: the blocking information is racy since mutex_is_locked
* doesn't check that the current thread holds the lock. The only
* other option would be to pass the boolean information of whether
* or not the class was blocking down through the stack which is
* less desirable.
*/
TP_fast_assign(
__entry->dev = ring->dev->primary->index;
__entry->ring = ring->id;
__entry->seqno = seqno;
__entry->blocking = mutex_is_locked(&ring->dev->struct_mutex);
),
TP_printk("dev=%u, ring=%u, seqno=%u, blocking=%s",
__entry->dev, __entry->ring, __entry->seqno,
__entry->blocking ? "yes (NB)" : "no")
);
DEFINE_EVENT(i915_gem_request, i915_gem_request_wait_end,
TP_PROTO(struct intel_ring_buffer *ring, u32 seqno),
TP_ARGS(ring, seqno)
);
DECLARE_EVENT_CLASS(i915_ring,
TP_PROTO(struct intel_ring_buffer *ring),
TP_ARGS(ring),
TP_STRUCT__entry(
__field(u32, dev)
__field(u32, ring)
),
TP_fast_assign(
__entry->dev = ring->dev->primary->index;
__entry->ring = ring->id;
),
TP_printk("dev=%u, ring=%u", __entry->dev, __entry->ring)
);
DEFINE_EVENT(i915_ring, i915_ring_wait_begin,
TP_PROTO(struct intel_ring_buffer *ring),
TP_ARGS(ring)
);
DEFINE_EVENT(i915_ring, i915_ring_wait_end,
TP_PROTO(struct intel_ring_buffer *ring),
TP_ARGS(ring)
);
TRACE_EVENT(i915_flip_request,
TP_PROTO(int plane, struct drm_i915_gem_object *obj),
TP_ARGS(plane, obj),
TP_STRUCT__entry(
__field(int, plane)
__field(struct drm_i915_gem_object *, obj)
),
TP_fast_assign(
__entry->plane = plane;
__entry->obj = obj;
),
TP_printk("plane=%d, obj=%p", __entry->plane, __entry->obj)
);
TRACE_EVENT(i915_flip_complete,
TP_PROTO(int plane, struct drm_i915_gem_object *obj),
TP_ARGS(plane, obj),
TP_STRUCT__entry(
__field(int, plane)
__field(struct drm_i915_gem_object *, obj)
),
TP_fast_assign(
__entry->plane = plane;
__entry->obj = obj;
),
TP_printk("plane=%d, obj=%p", __entry->plane, __entry->obj)
);
TRACE_EVENT(i915_reg_rw,
TP_PROTO(bool write, u32 reg, u64 val, int len),
TP_ARGS(write, reg, val, len),
TP_STRUCT__entry(
__field(u64, val)
__field(u32, reg)
__field(u16, write)
__field(u16, len)
),
TP_fast_assign(
__entry->val = (u64)val;
__entry->reg = reg;
__entry->write = write;
__entry->len = len;
),
TP_printk("%s reg=0x%x, len=%d, val=(0x%x, 0x%x)",
__entry->write ? "write" : "read",
__entry->reg, __entry->len,
(u32)(__entry->val & 0xffffffff),
(u32)(__entry->val >> 32))
);
TRACE_EVENT(intel_gpu_freq_change,
TP_PROTO(u32 freq),
TP_ARGS(freq),
TP_STRUCT__entry(
__field(u32, freq)
),
TP_fast_assign(
__entry->freq = freq;
),
TP_printk("new_freq=%u", __entry->freq)
);
#endif /* _I915_TRACE_H_ */
/* This part must be outside protection */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
#include <trace/define_trace.h>