Skip to content

Commit

Permalink
drm/i915: Add tracepoints
Browse files Browse the repository at this point in the history
By adding tracepoint equivalents for WATCH_BUF/EXEC we are able to monitor
the lifetimes of objects, requests and significant events. These events can
then be probed using the tracing frameworks, such as systemtap and, in
particular, perf.

For example to record the stack trace for every GPU stall during a run, use

  $ perf record -e i915:i915_gem_request_wait_begin -c 1 -g

And

  $ perf report

to view the results.

[Updated to fix compilation issues caused.]
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Ben Gamari <bgamari@gmail.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
  • Loading branch information
Chris Wilson committed Sep 23, 2009
1 parent 74dff28 commit 1c5d22f
Show file tree
Hide file tree
Showing 6 changed files with 447 additions and 16 deletions.
1 change: 1 addition & 0 deletions drivers/gpu/drm/i915/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o i915_mem.o \
i915_gem.o \
i915_gem_debug.o \
i915_gem_tiling.o \
i915_trace_points.o \
intel_display.o \
intel_crt.o \
intel_lvds.o \
Expand Down
8 changes: 7 additions & 1 deletion drivers/gpu/drm/i915/i915_dma.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "intel_drv.h"
#include "i915_drm.h"
#include "i915_drv.h"
#include "i915_trace.h"

/* Really want an OS-independent resettable timer. Would like to have
* this loop run for (eg) 3 sec, but have the timer reset every time
Expand All @@ -49,14 +50,18 @@ int i915_wait_ring(struct drm_device * dev, int n, const char *caller)
u32 last_head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
int i;

trace_i915_ring_wait_begin (dev);

for (i = 0; i < 100000; i++) {
ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
acthd = I915_READ(acthd_reg);
ring->space = ring->head - (ring->tail + 8);
if (ring->space < 0)
ring->space += ring->Size;
if (ring->space >= n)
if (ring->space >= n) {
trace_i915_ring_wait_end (dev);
return 0;
}

if (dev->primary->master) {
struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
Expand All @@ -76,6 +81,7 @@ int i915_wait_ring(struct drm_device * dev, int n, const char *caller)

}

trace_i915_ring_wait_end (dev);
return -EBUSY;
}

Expand Down
119 changes: 106 additions & 13 deletions drivers/gpu/drm/i915/i915_gem.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "drm.h"
#include "i915_drm.h"
#include "i915_drv.h"
#include "i915_trace.h"
#include "intel_drv.h"
#include <linux/swap.h>
#include <linux/pci.h>
Expand Down Expand Up @@ -1618,8 +1619,14 @@ i915_add_request(struct drm_device *dev, struct drm_file *file_priv,

if ((obj->write_domain & flush_domains) ==
obj->write_domain) {
uint32_t old_write_domain = obj->write_domain;

obj->write_domain = 0;
i915_gem_object_move_to_active(obj, seqno);

trace_i915_gem_object_change_domain(obj,
obj->read_domains,
old_write_domain);
}
}

Expand Down Expand Up @@ -1667,6 +1674,8 @@ i915_gem_retire_request(struct drm_device *dev,
{
drm_i915_private_t *dev_priv = dev->dev_private;

trace_i915_gem_request_retire(dev, request->seqno);

/* Move any buffers on the active list that are no longer referenced
* by the ringbuffer to the flushing/inactive lists as appropriate.
*/
Expand Down Expand Up @@ -1810,6 +1819,8 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)
i915_driver_irq_postinstall(dev);
}

trace_i915_gem_request_wait_begin(dev, seqno);

dev_priv->mm.waiting_gem_seqno = seqno;
i915_user_irq_get(dev);
ret = wait_event_interruptible(dev_priv->irq_queue,
Expand All @@ -1818,6 +1829,8 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)
atomic_read(&dev_priv->mm.wedged));
i915_user_irq_put(dev);
dev_priv->mm.waiting_gem_seqno = 0;

trace_i915_gem_request_wait_end(dev, seqno);
}
if (atomic_read(&dev_priv->mm.wedged))
ret = -EIO;
Expand Down Expand Up @@ -1850,6 +1863,8 @@ i915_gem_flush(struct drm_device *dev,
DRM_INFO("%s: invalidate %08x flush %08x\n", __func__,
invalidate_domains, flush_domains);
#endif
trace_i915_gem_request_flush(dev, dev_priv->mm.next_gem_seqno,
invalidate_domains, flush_domains);

if (flush_domains & I915_GEM_DOMAIN_CPU)
drm_agp_chipset_flush(dev);
Expand Down Expand Up @@ -2003,6 +2018,8 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
if (!list_empty(&obj_priv->list))
list_del_init(&obj_priv->list);

trace_i915_gem_object_unbind(obj);

return 0;
}

Expand Down Expand Up @@ -2452,6 +2469,8 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
else
i830_write_fence_reg(reg);

trace_i915_gem_object_get_fence(obj, i, obj_priv->tiling_mode);

return 0;
}

Expand Down Expand Up @@ -2650,6 +2669,8 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);

trace_i915_gem_object_bind(obj, obj_priv->gtt_offset);

return 0;
}

Expand All @@ -2665,6 +2686,8 @@ i915_gem_clflush_object(struct drm_gem_object *obj)
if (obj_priv->pages == NULL)
return;

trace_i915_gem_object_clflush(obj);

drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE);
}

Expand All @@ -2674,43 +2697,62 @@ i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)
{
struct drm_device *dev = obj->dev;
uint32_t seqno;
uint32_t old_write_domain;

if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
return;

/* Queue the GPU write cache flushing we need. */
old_write_domain = obj->write_domain;
i915_gem_flush(dev, 0, obj->write_domain);
seqno = i915_add_request(dev, NULL, obj->write_domain);
obj->write_domain = 0;
i915_gem_object_move_to_active(obj, seqno);

trace_i915_gem_object_change_domain(obj,
obj->read_domains,
old_write_domain);
}

/** Flushes the GTT write domain for the object if it's dirty. */
static void
i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj)
{
uint32_t old_write_domain;

if (obj->write_domain != I915_GEM_DOMAIN_GTT)
return;

/* No actual flushing is required for the GTT write domain. Writes
* to it immediately go to main memory as far as we know, so there's
* no chipset flush. It also doesn't land in render cache.
*/
old_write_domain = obj->write_domain;
obj->write_domain = 0;

trace_i915_gem_object_change_domain(obj,
obj->read_domains,
old_write_domain);
}

/** Flushes the CPU write domain for the object if it's dirty. */
static void
i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj)
{
struct drm_device *dev = obj->dev;
uint32_t old_write_domain;

if (obj->write_domain != I915_GEM_DOMAIN_CPU)
return;

i915_gem_clflush_object(obj);
drm_agp_chipset_flush(dev);
old_write_domain = obj->write_domain;
obj->write_domain = 0;

trace_i915_gem_object_change_domain(obj,
obj->read_domains,
old_write_domain);
}

/**
Expand All @@ -2723,6 +2765,7 @@ int
i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
{
struct drm_i915_gem_object *obj_priv = obj->driver_private;
uint32_t old_write_domain, old_read_domains;
int ret;

/* Not valid to be called on unbound objects. */
Expand All @@ -2735,6 +2778,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
if (ret != 0)
return ret;

old_write_domain = obj->write_domain;
old_read_domains = obj->read_domains;

/* If we're writing through the GTT domain, then CPU and GPU caches
* will need to be invalidated at next use.
*/
Expand All @@ -2753,6 +2799,10 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
obj_priv->dirty = 1;
}

trace_i915_gem_object_change_domain(obj,
old_read_domains,
old_write_domain);

return 0;
}

Expand All @@ -2765,6 +2815,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
static int
i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
{
uint32_t old_write_domain, old_read_domains;
int ret;

i915_gem_object_flush_gpu_write_domain(obj);
Expand All @@ -2780,6 +2831,9 @@ i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
*/
i915_gem_object_set_to_full_cpu_read_domain(obj);

old_write_domain = obj->write_domain;
old_read_domains = obj->read_domains;

/* Flush the CPU cache if it's still invalid. */
if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
i915_gem_clflush_object(obj);
Expand All @@ -2800,6 +2854,10 @@ i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
obj->write_domain = I915_GEM_DOMAIN_CPU;
}

trace_i915_gem_object_change_domain(obj,
old_read_domains,
old_write_domain);

return 0;
}

Expand Down Expand Up @@ -2921,6 +2979,7 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
struct drm_i915_gem_object *obj_priv = obj->driver_private;
uint32_t invalidate_domains = 0;
uint32_t flush_domains = 0;
uint32_t old_read_domains;

BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU);
BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU);
Expand Down Expand Up @@ -2967,6 +3026,8 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
i915_gem_clflush_object(obj);
}

old_read_domains = obj->read_domains;

/* The actual obj->write_domain will be updated with
* pending_write_domain after we emit the accumulated flush for all
* of our domain changes in execbuffers (which clears objects'
Expand All @@ -2985,6 +3046,10 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
obj->read_domains, obj->write_domain,
dev->invalidate_domains, dev->flush_domains);
#endif

trace_i915_gem_object_change_domain(obj,
old_read_domains,
obj->write_domain);
}

/**
Expand Down Expand Up @@ -3037,6 +3102,7 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
uint64_t offset, uint64_t size)
{
struct drm_i915_gem_object *obj_priv = obj->driver_private;
uint32_t old_read_domains;
int i, ret;

if (offset == 0 && size == obj->size)
Expand Down Expand Up @@ -3083,8 +3149,13 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
*/
BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);

old_read_domains = obj->read_domains;
obj->read_domains |= I915_GEM_DOMAIN_CPU;

trace_i915_gem_object_change_domain(obj,
old_read_domains,
obj->write_domain);

return 0;
}

Expand Down Expand Up @@ -3282,6 +3353,8 @@ i915_dispatch_gem_execbuffer(struct drm_device *dev,
exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
exec_len = (uint32_t) exec->batch_len;

trace_i915_gem_request_submit(dev, dev_priv->mm.next_gem_seqno);

count = nbox ? nbox : 1;

for (i = 0; i < count; i++) {
Expand Down Expand Up @@ -3660,8 +3733,12 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,

for (i = 0; i < args->buffer_count; i++) {
struct drm_gem_object *obj = object_list[i];
uint32_t old_write_domain = obj->write_domain;

obj->write_domain = obj->pending_write_domain;
trace_i915_gem_object_change_domain(obj,
obj->read_domains,
old_write_domain);
}

i915_verify_inactive(dev, __FILE__, __LINE__);
Expand Down Expand Up @@ -4050,6 +4127,8 @@ int i915_gem_init_object(struct drm_gem_object *obj)
INIT_LIST_HEAD(&obj_priv->fence_list);
obj_priv->madv = I915_MADV_WILLNEED;

trace_i915_gem_object_create(obj);

return 0;
}

Expand All @@ -4058,6 +4137,8 @@ void i915_gem_free_object(struct drm_gem_object *obj)
struct drm_device *dev = obj->dev;
struct drm_i915_gem_object *obj_priv = obj->driver_private;

trace_i915_gem_object_destroy(obj);

while (obj_priv->pin_count > 0)
i915_gem_object_unpin(obj);

Expand Down Expand Up @@ -4186,24 +4267,36 @@ i915_gem_idle(struct drm_device *dev)
* the GPU domains and just stuff them onto inactive.
*/
while (!list_empty(&dev_priv->mm.active_list)) {
struct drm_i915_gem_object *obj_priv;
struct drm_gem_object *obj;
uint32_t old_write_domain;

obj_priv = list_first_entry(&dev_priv->mm.active_list,
struct drm_i915_gem_object,
list);
obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
i915_gem_object_move_to_inactive(obj_priv->obj);
obj = list_first_entry(&dev_priv->mm.active_list,
struct drm_i915_gem_object,
list)->obj;
old_write_domain = obj->write_domain;
obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
i915_gem_object_move_to_inactive(obj);

trace_i915_gem_object_change_domain(obj,
obj->read_domains,
old_write_domain);
}
spin_unlock(&dev_priv->mm.active_list_lock);

while (!list_empty(&dev_priv->mm.flushing_list)) {
struct drm_i915_gem_object *obj_priv;

obj_priv = list_first_entry(&dev_priv->mm.flushing_list,
struct drm_i915_gem_object,
list);
obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
i915_gem_object_move_to_inactive(obj_priv->obj);
struct drm_gem_object *obj;
uint32_t old_write_domain;

obj = list_first_entry(&dev_priv->mm.flushing_list,
struct drm_i915_gem_object,
list)->obj;
old_write_domain = obj->write_domain;
obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
i915_gem_object_move_to_inactive(obj);

trace_i915_gem_object_change_domain(obj,
obj->read_domains,
old_write_domain);
}


Expand Down
Loading

0 comments on commit 1c5d22f

Please sign in to comment.