Skip to content

Commit

Permalink
drm/i915: Preallocate next seqno before touching the ring
Browse files Browse the repository at this point in the history
Based on the work by Mika Kuoppala, we realised that we need to handle
seqno wraparound prior to committing our changes to the ring. The most
obvious point then is to grab the seqno inside intel_ring_begin(), and
then to reuse that seqno for all ring operations until the next request.
As intel_ring_begin() can fail, the callers must already be prepared to
handle such failure and so we can safely add further checks.

This patch looks like it should be split up into the interface
changes and the tweaks to move seqno wrapping from the execbuffer into
the core seqno increment. However, I found no easy way to break it into
incremental steps without introducing further broken behaviour.

v2: Mika found a silly mistake and a subtle error in the existing code;
inside i915_gem_retire_requests() we were resetting the sync_seqno of
the target ring based on the seqno from this ring - which are only
related by the order of their allocation, not retirement. Hence we were
applying the optimisation that the rings were synchronised too early,
fortunately the only real casualty there is the handling of seqno
wrapping.

v3: Do not forget to reset the sync_seqno upon module reinitialisation,
ala resume.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=863861
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> [v2]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
  • Loading branch information
Chris Wilson authored and Daniel Vetter committed Nov 29, 2012
1 parent 45e2b5f commit 9d77309
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 82 deletions.
5 changes: 2 additions & 3 deletions drivers/gpu/drm/i915/i915_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -1417,8 +1417,7 @@ int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
int i915_gem_object_sync(struct drm_i915_gem_object *obj,
struct intel_ring_buffer *to);
void i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
struct intel_ring_buffer *ring,
u32 seqno);
struct intel_ring_buffer *ring);

int i915_gem_dumb_create(struct drm_file *file_priv,
struct drm_device *dev,
Expand All @@ -1436,7 +1435,7 @@ i915_seqno_passed(uint32_t seq1, uint32_t seq2)
return (int32_t)(seq1 - seq2) >= 0;
}

u32 i915_gem_next_request_seqno(struct intel_ring_buffer *ring);
extern int i915_gem_get_seqno(struct drm_device *dev, u32 *seqno);

int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj);
int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
Expand Down
74 changes: 47 additions & 27 deletions drivers/gpu/drm/i915/i915_gem.c
Original file line number Diff line number Diff line change
Expand Up @@ -1857,11 +1857,11 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)

void
i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
struct intel_ring_buffer *ring,
u32 seqno)
struct intel_ring_buffer *ring)
{
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
u32 seqno = intel_ring_get_seqno(ring);

BUG_ON(ring == NULL);
obj->ring = ring;
Expand Down Expand Up @@ -1922,26 +1922,54 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
WARN_ON(i915_verify_lists(dev));
}

static u32
i915_gem_get_seqno(struct drm_device *dev)
static int
i915_gem_handle_seqno_wrap(struct drm_device *dev)
{
drm_i915_private_t *dev_priv = dev->dev_private;
u32 seqno = dev_priv->next_seqno;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_ring_buffer *ring;
int ret, i, j;

/* reserve 0 for non-seqno */
if (++dev_priv->next_seqno == 0)
dev_priv->next_seqno = 1;
/* The hardware uses various monotonic 32-bit counters, if we
* detect that they will wraparound we need to idle the GPU
* and reset those counters.
*/
ret = 0;
for_each_ring(ring, dev_priv, i) {
for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
ret |= ring->sync_seqno[j] != 0;
}
if (ret == 0)
return ret;

return seqno;
ret = i915_gpu_idle(dev);
if (ret)
return ret;

i915_gem_retire_requests(dev);
for_each_ring(ring, dev_priv, i) {
for (j = 0; j < ARRAY_SIZE(ring->sync_seqno); j++)
ring->sync_seqno[j] = 0;
}

return 0;
}

u32
i915_gem_next_request_seqno(struct intel_ring_buffer *ring)
int
i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
{
if (ring->outstanding_lazy_request == 0)
ring->outstanding_lazy_request = i915_gem_get_seqno(ring->dev);
struct drm_i915_private *dev_priv = dev->dev_private;

/* reserve 0 for non-seqno */
if (dev_priv->next_seqno == 0) {
int ret = i915_gem_handle_seqno_wrap(dev);
if (ret)
return ret;

return ring->outstanding_lazy_request;
dev_priv->next_seqno = 1;
}

*seqno = dev_priv->next_seqno++;
return 0;
}

int
Expand All @@ -1952,7 +1980,6 @@ i915_add_request(struct intel_ring_buffer *ring,
drm_i915_private_t *dev_priv = ring->dev->dev_private;
struct drm_i915_gem_request *request;
u32 request_ring_position;
u32 seqno;
int was_empty;
int ret;

Expand All @@ -1971,7 +1998,6 @@ i915_add_request(struct intel_ring_buffer *ring,
if (request == NULL)
return -ENOMEM;

seqno = i915_gem_next_request_seqno(ring);

/* Record the position of the start of the request so that
* should we detect the updated seqno part-way through the
Expand All @@ -1980,15 +2006,13 @@ i915_add_request(struct intel_ring_buffer *ring,
*/
request_ring_position = intel_ring_get_tail(ring);

ret = ring->add_request(ring, &seqno);
ret = ring->add_request(ring);
if (ret) {
kfree(request);
return ret;
}

trace_i915_gem_request_add(ring, seqno);

request->seqno = seqno;
request->seqno = intel_ring_get_seqno(ring);
request->ring = ring;
request->tail = request_ring_position;
request->emitted_jiffies = jiffies;
Expand All @@ -2006,6 +2030,7 @@ i915_add_request(struct intel_ring_buffer *ring,
spin_unlock(&file_priv->mm.lock);
}

trace_i915_gem_request_add(ring, request->seqno);
ring->outstanding_lazy_request = 0;

if (!dev_priv->mm.suspended) {
Expand All @@ -2022,7 +2047,7 @@ i915_add_request(struct intel_ring_buffer *ring,
}

if (out_seqno)
*out_seqno = seqno;
*out_seqno = request->seqno;
return 0;
}

Expand Down Expand Up @@ -2120,7 +2145,6 @@ void
i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
{
uint32_t seqno;
int i;

if (list_empty(&ring->request_list))
return;
Expand All @@ -2129,10 +2153,6 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)

seqno = ring->get_seqno(ring, true);

for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++)
if (seqno >= ring->sync_seqno[i])
ring->sync_seqno[i] = 0;

while (!list_empty(&ring->request_list)) {
struct drm_i915_gem_request *request;

Expand Down
3 changes: 1 addition & 2 deletions drivers/gpu/drm/i915/i915_gem_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -410,9 +410,8 @@ static int do_switch(struct i915_hw_context *to)
* MI_SET_CONTEXT instead of when the next seqno has completed.
*/
if (from_obj != NULL) {
u32 seqno = i915_gem_next_request_seqno(ring);
from_obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
i915_gem_object_move_to_active(from_obj, ring, seqno);
i915_gem_object_move_to_active(from_obj, ring);
/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
* whole damn pipeline, we don't need to explicitly mark the
* object dirty. The only exception is that the context must be
Expand Down
30 changes: 6 additions & 24 deletions drivers/gpu/drm/i915/i915_gem_execbuffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -713,8 +713,7 @@ validate_exec_list(struct drm_i915_gem_exec_object2 *exec,

static void
i915_gem_execbuffer_move_to_active(struct list_head *objects,
struct intel_ring_buffer *ring,
u32 seqno)
struct intel_ring_buffer *ring)
{
struct drm_i915_gem_object *obj;

Expand All @@ -726,10 +725,10 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects,
obj->base.write_domain = obj->base.pending_write_domain;
obj->fenced_gpu_access = obj->pending_fenced_gpu_access;

i915_gem_object_move_to_active(obj, ring, seqno);
i915_gem_object_move_to_active(obj, ring);
if (obj->base.write_domain) {
obj->dirty = 1;
obj->last_write_seqno = seqno;
obj->last_write_seqno = intel_ring_get_seqno(ring);
if (obj->pin_count) /* check for potential scanout */
intel_mark_fb_busy(obj);
}
Expand Down Expand Up @@ -789,7 +788,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
struct intel_ring_buffer *ring;
u32 ctx_id = i915_execbuffer2_get_context_id(*args);
u32 exec_start, exec_len;
u32 seqno;
u32 mask;
u32 flags;
int ret, mode, i;
Expand Down Expand Up @@ -994,22 +992,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
if (ret)
goto err;

seqno = i915_gem_next_request_seqno(ring);
for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) {
if (seqno < ring->sync_seqno[i]) {
/* The GPU can not handle its semaphore value wrapping,
* so every billion or so execbuffers, we need to stall
* the GPU in order to reset the counters.
*/
ret = i915_gpu_idle(dev);
if (ret)
goto err;
i915_gem_retire_requests(dev);

BUG_ON(ring->sync_seqno[i]);
}
}

ret = i915_switch_context(ring, file, ctx_id);
if (ret)
goto err;
Expand All @@ -1035,8 +1017,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
goto err;
}

trace_i915_gem_ring_dispatch(ring, seqno, flags);

exec_start = batch_obj->gtt_offset + args->batch_start_offset;
exec_len = args->batch_len;
if (cliprects) {
Expand All @@ -1060,7 +1040,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
goto err;
}

i915_gem_execbuffer_move_to_active(&objects, ring, seqno);
trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);

i915_gem_execbuffer_move_to_active(&objects, ring);
i915_gem_execbuffer_retire_commands(dev, file, ring);

err:
Expand Down
49 changes: 26 additions & 23 deletions drivers/gpu/drm/i915/intel_ringbuffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -555,12 +555,11 @@ static void render_ring_cleanup(struct intel_ring_buffer *ring)

static void
update_mboxes(struct intel_ring_buffer *ring,
u32 seqno,
u32 mmio_offset)
u32 mmio_offset)
{
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
intel_ring_emit(ring, mmio_offset);
intel_ring_emit(ring, seqno);
intel_ring_emit(ring, ring->outstanding_lazy_request);
}

/**
Expand All @@ -573,8 +572,7 @@ update_mboxes(struct intel_ring_buffer *ring,
* This acts like a signal in the canonical semaphore.
*/
static int
gen6_add_request(struct intel_ring_buffer *ring,
u32 *seqno)
gen6_add_request(struct intel_ring_buffer *ring)
{
u32 mbox1_reg;
u32 mbox2_reg;
Expand All @@ -587,13 +585,11 @@ gen6_add_request(struct intel_ring_buffer *ring,
mbox1_reg = ring->signal_mbox[0];
mbox2_reg = ring->signal_mbox[1];

*seqno = i915_gem_next_request_seqno(ring);

update_mboxes(ring, *seqno, mbox1_reg);
update_mboxes(ring, *seqno, mbox2_reg);
update_mboxes(ring, mbox1_reg);
update_mboxes(ring, mbox2_reg);
intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
intel_ring_emit(ring, *seqno);
intel_ring_emit(ring, ring->outstanding_lazy_request);
intel_ring_emit(ring, MI_USER_INTERRUPT);
intel_ring_advance(ring);

Expand Down Expand Up @@ -650,10 +646,8 @@ do { \
} while (0)

static int
pc_render_add_request(struct intel_ring_buffer *ring,
u32 *result)
pc_render_add_request(struct intel_ring_buffer *ring)
{
u32 seqno = i915_gem_next_request_seqno(ring);
struct pipe_control *pc = ring->private;
u32 scratch_addr = pc->gtt_offset + 128;
int ret;
Expand All @@ -674,7 +668,7 @@ pc_render_add_request(struct intel_ring_buffer *ring,
PIPE_CONTROL_WRITE_FLUSH |
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
intel_ring_emit(ring, seqno);
intel_ring_emit(ring, ring->outstanding_lazy_request);
intel_ring_emit(ring, 0);
PIPE_CONTROL_FLUSH(ring, scratch_addr);
scratch_addr += 128; /* write to separate cachelines */
Expand All @@ -693,11 +687,10 @@ pc_render_add_request(struct intel_ring_buffer *ring,
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
PIPE_CONTROL_NOTIFY);
intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
intel_ring_emit(ring, seqno);
intel_ring_emit(ring, ring->outstanding_lazy_request);
intel_ring_emit(ring, 0);
intel_ring_advance(ring);

*result = seqno;
return 0;
}

Expand Down Expand Up @@ -885,25 +878,20 @@ bsd_ring_flush(struct intel_ring_buffer *ring,
}

static int
i9xx_add_request(struct intel_ring_buffer *ring,
u32 *result)
i9xx_add_request(struct intel_ring_buffer *ring)
{
u32 seqno;
int ret;

ret = intel_ring_begin(ring, 4);
if (ret)
return ret;

seqno = i915_gem_next_request_seqno(ring);

intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
intel_ring_emit(ring, seqno);
intel_ring_emit(ring, ring->outstanding_lazy_request);
intel_ring_emit(ring, MI_USER_INTERRUPT);
intel_ring_advance(ring);

*result = seqno;
return 0;
}

Expand Down Expand Up @@ -1110,6 +1098,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
INIT_LIST_HEAD(&ring->active_list);
INIT_LIST_HEAD(&ring->request_list);
ring->size = 32 * PAGE_SIZE;
memset(ring->sync_seqno, 0, sizeof(ring->sync_seqno));

init_waitqueue_head(&ring->irq_queue);

Expand Down Expand Up @@ -1338,6 +1327,15 @@ int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n)
return -EBUSY;
}

static int
intel_ring_alloc_seqno(struct intel_ring_buffer *ring)
{
if (ring->outstanding_lazy_request)
return 0;

return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_request);
}

int intel_ring_begin(struct intel_ring_buffer *ring,
int num_dwords)
{
Expand All @@ -1349,6 +1347,11 @@ int intel_ring_begin(struct intel_ring_buffer *ring,
if (ret)
return ret;

/* Preallocate the olr before touching the ring */
ret = intel_ring_alloc_seqno(ring);
if (ret)
return ret;

if (unlikely(ring->tail + n > ring->effective_size)) {
ret = intel_wrap_ring_buffer(ring);
if (unlikely(ret))
Expand Down
Loading

0 comments on commit 9d77309

Please sign in to comment.