Skip to content

Commit

Permalink
drm/i915: Update error capture code to avoid using the current vma state
Browse files Browse the repository at this point in the history
With asynchronous migrations, the vma state may be several migrations
ahead of the state that matches the request we're capturing.
Address that by introducing an i915_vma_snapshot structure that
can be used to snapshot relevant state at request submission.
In order to make sure we access the correct memory, the snapshots take
references on relevant sg-tables and memory regions.

Also move the capture list allocation out of the fence signaling
critical path and use the CONFIG_DRM_I915_CAPTURE_ERROR define to
avoid compiling in members and functions used for error capture
when they're not used.

Finally, Introduce lockdep annotation.

v4:
- Break out the capture allocation mode change to a separate patch.
v5:
- Fix compilation error in the !CONFIG_DRM_I915_CAPTURE_ERROR case
  (kernel test robot)
v6:
- Use #if IS_ENABLED() instead of #ifdef to match driver style.
- Move yet another change of allocation mode to the separate patch.
- Commit message rework due to patch reordering.
v7:
- Adjust for removal of region refcounting.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Ramalingam C <ramalingam.c@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211129202245.472043-1-thomas.hellstrom@linux.intel.com
  • Loading branch information
Thomas Hellström committed Dec 1, 2021
1 parent 49a8bf5 commit ff20afc
Show file tree
Hide file tree
Showing 8 changed files with 554 additions and 95 deletions.
1 change: 1 addition & 0 deletions drivers/gpu/drm/i915/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ i915-y += \
i915_trace_points.o \
i915_ttm_buddy_manager.o \
i915_vma.o \
i915_vma_snapshot.o \
intel_wopcm.o

# general-purpose microcontroller (GuC) support
Expand Down
135 changes: 112 additions & 23 deletions drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "i915_gem_ioctls.h"
#include "i915_trace.h"
#include "i915_user_extensions.h"
#include "i915_vma_snapshot.h"

struct eb_vma {
struct i915_vma *vma;
Expand Down Expand Up @@ -307,11 +308,15 @@ struct i915_execbuffer {

struct eb_fence *fences;
unsigned long num_fences;
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
struct i915_capture_list *capture_lists[MAX_ENGINE_INSTANCE + 1];
#endif
};

static int eb_parse(struct i915_execbuffer *eb);
static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle);
static void eb_unpin_engine(struct i915_execbuffer *eb);
static void eb_capture_release(struct i915_execbuffer *eb);

static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
{
Expand Down Expand Up @@ -1043,6 +1048,7 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
i915_vma_put(vma);
}

eb_capture_release(eb);
eb_unpin_engine(eb);
}

Expand Down Expand Up @@ -1880,36 +1886,113 @@ eb_find_first_request_added(struct i915_execbuffer *eb)
return NULL;
}

static int eb_move_to_gpu(struct i915_execbuffer *eb)
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)

/* Stage with GFP_KERNEL allocations before we enter the signaling critical path */
static void eb_capture_stage(struct i915_execbuffer *eb)
{
const unsigned int count = eb->buffer_count;
unsigned int i = count;
int err = 0, j;
unsigned int i = count, j;
struct i915_vma_snapshot *vsnap;

while (i--) {
struct eb_vma *ev = &eb->vma[i];
struct i915_vma *vma = ev->vma;
unsigned int flags = ev->flags;
struct drm_i915_gem_object *obj = vma->obj;

assert_vma_held(vma);
if (!(flags & EXEC_OBJECT_CAPTURE))
continue;

if (flags & EXEC_OBJECT_CAPTURE) {
vsnap = i915_vma_snapshot_alloc(GFP_KERNEL);
if (!vsnap)
continue;

i915_vma_snapshot_init(vsnap, vma, "user");
for_each_batch_create_order(eb, j) {
struct i915_capture_list *capture;

for_each_batch_create_order(eb, j) {
if (!eb->requests[j])
break;
capture = kmalloc(sizeof(*capture), GFP_KERNEL);
if (!capture)
continue;

capture = kmalloc(sizeof(*capture), GFP_KERNEL);
if (capture) {
capture->next =
eb->requests[j]->capture_list;
capture->vma = vma;
eb->requests[j]->capture_list = capture;
}
}
capture->next = eb->capture_lists[j];
capture->vma_snapshot = i915_vma_snapshot_get(vsnap);
eb->capture_lists[j] = capture;
}
i915_vma_snapshot_put(vsnap);
}
}

/* Commit once we're in the critical path */
static void eb_capture_commit(struct i915_execbuffer *eb)
{
unsigned int j;

for_each_batch_create_order(eb, j) {
struct i915_request *rq = eb->requests[j];

if (!rq)
break;

rq->capture_list = eb->capture_lists[j];
eb->capture_lists[j] = NULL;
}
}

/*
* Release anything that didn't get committed due to errors.
* The capture_list will otherwise be freed at request retire.
*/
static void eb_capture_release(struct i915_execbuffer *eb)
{
unsigned int j;

for_each_batch_create_order(eb, j) {
if (eb->capture_lists[j]) {
i915_request_free_capture_list(eb->capture_lists[j]);
eb->capture_lists[j] = NULL;
}
}
}

static void eb_capture_list_clear(struct i915_execbuffer *eb)
{
memset(eb->capture_lists, 0, sizeof(eb->capture_lists));
}

#else

static void eb_capture_stage(struct i915_execbuffer *eb)
{
}

static void eb_capture_commit(struct i915_execbuffer *eb)
{
}

static void eb_capture_release(struct i915_execbuffer *eb)
{
}

static void eb_capture_list_clear(struct i915_execbuffer *eb)
{
}

#endif

static int eb_move_to_gpu(struct i915_execbuffer *eb)
{
const unsigned int count = eb->buffer_count;
unsigned int i = count;
int err = 0, j;

while (i--) {
struct eb_vma *ev = &eb->vma[i];
struct i915_vma *vma = ev->vma;
unsigned int flags = ev->flags;
struct drm_i915_gem_object *obj = vma->obj;

assert_vma_held(vma);

/*
* If the GPU is not _reading_ through the CPU cache, we need
Expand Down Expand Up @@ -1990,6 +2073,8 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)

/* Unconditionally flush any chipset caches (for streaming writes). */
intel_gt_chipset_flush(eb->gt);
eb_capture_commit(eb);

return 0;

err_skip:
Expand Down Expand Up @@ -3132,13 +3217,14 @@ eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence,
}

/*
* Whilst this request exists, batch_obj will be on the
* active_list, and so will hold the active reference. Only when
* this request is retired will the batch_obj be moved onto
* the inactive_list and lose its active reference. Hence we do
* not need to explicitly hold another reference here.
* Not really on stack, but we don't want to call
* kfree on the batch_snapshot when we put it, so use the
* _onstack interface.
*/
eb->requests[i]->batch = eb->batches[i]->vma;
if (eb->batches[i]->vma)
i915_vma_snapshot_init_onstack(&eb->requests[i]->batch_snapshot,
eb->batches[i]->vma,
"batch");
if (eb->batch_pool) {
GEM_BUG_ON(intel_context_is_parallel(eb->context));
intel_gt_buffer_pool_mark_active(eb->batch_pool,
Expand Down Expand Up @@ -3187,6 +3273,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.fences = NULL;
eb.num_fences = 0;

eb_capture_list_clear(&eb);

memset(eb.requests, 0, sizeof(struct i915_request *) *
ARRAY_SIZE(eb.requests));
eb.composite_fence = NULL;
Expand Down Expand Up @@ -3273,6 +3361,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
}

ww_acquire_done(&eb.ww.ctx);
eb_capture_stage(&eb);

out_fence = eb_requests_create(&eb, in_fence, out_fence_fd);
if (IS_ERR(out_fence)) {
Expand Down
8 changes: 6 additions & 2 deletions drivers/gpu/drm/i915/gt/intel_engine_cs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1676,14 +1676,18 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,

static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
{
struct i915_vma_snapshot *vsnap = &rq->batch_snapshot;
void *ring;
int size;

if (!i915_vma_snapshot_present(vsnap))
vsnap = NULL;

drm_printf(m,
"[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n",
rq->head, rq->postfix, rq->tail,
rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u,
rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u);
vsnap ? upper_32_bits(vsnap->gtt_offset) : ~0u,
vsnap ? lower_32_bits(vsnap->gtt_offset) : ~0u);

size = rq->tail - rq->head;
if (rq->tail < rq->head)
Expand Down
Loading

0 comments on commit ff20afc

Please sign in to comment.