Skip to content

Commit

Permalink
drm/i915/gem: Use chained reloc batches
Browse files Browse the repository at this point in the history
The ring is a precious resource: we anticipate to only use a few hundred
bytes for a request, and only try to reserve that before we start. If we
go beyond our guess in building the request, then instead of waiting at
the start of execbuf before we hold any locks or other resources, we
may trigger a wait inside a critical region. One example is in using gpu
relocations, where currently we emit a new MI_BB_START from the ring
every time we overflow a page of relocation entries. However, instead of
insert the command into the precious ring, we can chain the next page of
relocation entries as MI_BB_START from the end of the previous.

v2: Delay the emit_bb_start until after all the chained vma
synchronisation is complete. Since the buffer pool batches are idle, this
_should_ be a no-op, but one day we may some fancy async GPU bindings
for new vma!

v3: Use pool/batch consitently, once we start thinking in terms of the
batch vma, use batch->obj.
v4: Explain the magic number 4.

Tvrtko spotted that we lose propagation of the error for failing to
submit the relocation request; that's easier to fix up in the next
patch.

Testcase: igt/gem_exec_reloc/basic-many-active
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200501192945.22215-1-chris@chris-wilson.co.uk
  • Loading branch information
Chris Wilson committed May 1, 2020
1 parent 9f909e2 commit 964a9b0
Showing 1 changed file with 115 additions and 19 deletions.
134 changes: 115 additions & 19 deletions drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ struct i915_execbuffer {
bool needs_unfenced : 1;

struct i915_request *rq;
struct i915_vma *rq_vma;
u32 *rq_cmd;
unsigned int rq_size;
} reloc_cache;
Expand Down Expand Up @@ -975,20 +976,114 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
return &i915->ggtt;
}

#define RELOC_TAIL 4

static int reloc_gpu_chain(struct reloc_cache *cache)
{
struct intel_gt_buffer_pool_node *pool;
struct i915_request *rq = cache->rq;
struct i915_vma *batch;
u32 *cmd;
int err;

pool = intel_gt_get_buffer_pool(rq->engine->gt, PAGE_SIZE);
if (IS_ERR(pool))
return PTR_ERR(pool);

batch = i915_vma_instance(pool->obj, rq->context->vm, NULL);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
goto out_pool;
}

err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK);
if (err)
goto out_pool;

GEM_BUG_ON(cache->rq_size + RELOC_TAIL > PAGE_SIZE / sizeof(u32));
cmd = cache->rq_cmd + cache->rq_size;
*cmd++ = MI_ARB_CHECK;
if (cache->gen >= 8) {
*cmd++ = MI_BATCH_BUFFER_START_GEN8;
*cmd++ = lower_32_bits(batch->node.start);
*cmd++ = upper_32_bits(batch->node.start);
} else {
*cmd++ = MI_BATCH_BUFFER_START;
*cmd++ = lower_32_bits(batch->node.start);
}
i915_gem_object_flush_map(cache->rq_vma->obj);
i915_gem_object_unpin_map(cache->rq_vma->obj);
cache->rq_vma = NULL;

err = intel_gt_buffer_pool_mark_active(pool, rq);
if (err == 0) {
i915_vma_lock(batch);
err = i915_request_await_object(rq, batch->obj, false);
if (err == 0)
err = i915_vma_move_to_active(batch, rq, 0);
i915_vma_unlock(batch);
}
i915_vma_unpin(batch);
if (err)
goto out_pool;

cmd = i915_gem_object_pin_map(batch->obj,
cache->has_llc ?
I915_MAP_FORCE_WB :
I915_MAP_FORCE_WC);
if (IS_ERR(cmd)) {
err = PTR_ERR(cmd);
goto out_pool;
}

/* Return with batch mapping (cmd) still pinned */
cache->rq_cmd = cmd;
cache->rq_size = 0;
cache->rq_vma = batch;

out_pool:
intel_gt_buffer_pool_put(pool);
return err;
}

static unsigned int reloc_bb_flags(const struct reloc_cache *cache)
{
return cache->gen > 5 ? 0 : I915_DISPATCH_SECURE;
}

static void reloc_gpu_flush(struct reloc_cache *cache)
{
struct drm_i915_gem_object *obj = cache->rq->batch->obj;
struct i915_request *rq;
int err;

GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END;
rq = fetch_and_zero(&cache->rq);
if (!rq)
return;

__i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1));
i915_gem_object_unpin_map(obj);
if (cache->rq_vma) {
struct drm_i915_gem_object *obj = cache->rq_vma->obj;

intel_gt_chipset_flush(cache->rq->engine->gt);
GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
cache->rq_cmd[cache->rq_size++] = MI_BATCH_BUFFER_END;

i915_request_add(cache->rq);
cache->rq = NULL;
__i915_gem_object_flush_map(obj,
0, sizeof(u32) * cache->rq_size);
i915_gem_object_unpin_map(obj);
}

err = 0;
if (rq->engine->emit_init_breadcrumb)
err = rq->engine->emit_init_breadcrumb(rq);
if (!err)
err = rq->engine->emit_bb_start(rq,
rq->batch->node.start,
PAGE_SIZE,
reloc_bb_flags(cache));
if (err)
i915_request_set_error_once(rq, err);

intel_gt_chipset_flush(rq->engine->gt);
i915_request_add(rq);
}

static void reloc_cache_reset(struct reloc_cache *cache)
Expand Down Expand Up @@ -1237,12 +1332,6 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
if (err)
goto err_request;

err = eb->engine->emit_bb_start(rq,
batch->node.start, PAGE_SIZE,
cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
if (err)
goto skip_request;

i915_vma_lock(batch);
err = i915_request_await_object(rq, batch->obj, false);
if (err == 0)
Expand All @@ -1257,6 +1346,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
cache->rq = rq;
cache->rq_cmd = cmd;
cache->rq_size = 0;
cache->rq_vma = batch;

/* Return with batch mapping (cmd) still pinned */
goto out_pool;
Expand All @@ -1280,13 +1370,9 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
{
struct reloc_cache *cache = &eb->reloc_cache;
u32 *cmd;

if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1))
reloc_gpu_flush(cache);
int err;

if (unlikely(!cache->rq)) {
int err;

if (!intel_engine_can_store_dword(eb->engine))
return ERR_PTR(-ENODEV);

Expand All @@ -1295,6 +1381,16 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
return ERR_PTR(err);
}

if (unlikely(cache->rq_size + len >
PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) {
err = reloc_gpu_chain(cache);
if (unlikely(err)) {
i915_request_set_error_once(cache->rq, err);
return ERR_PTR(err);
}
}

GEM_BUG_ON(cache->rq_size + len >= PAGE_SIZE / sizeof(u32));
cmd = cache->rq_cmd + cache->rq_size;
cache->rq_size += len;

Expand Down

0 comments on commit 964a9b0

Please sign in to comment.