Skip to content

Commit

Permalink
drm/i915: Lock the engine while dumping the active request
Browse files Browse the repository at this point in the history
We cannot let the request be retired and freed while we are trying to
dump it during error capture. It is not sufficient just to grab a
reference to the request, as during retirement we may free the ring
which we are also dumping. So take the engine lock to prevent retiring
and freeing of the request.

Reported-by: Alex Shumsky <alexthreed@gmail.com>
Fixes: 83c3178 ("drm/i915: Dump the ringbuffer of the active request for debugging")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Alex Shumsky <alexthreed@gmail.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190715080946.15593-6-chris@chris-wilson.co.uk
  • Loading branch information
Chris Wilson committed Jul 16, 2019
1 parent bb80c92 commit cfe7288
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 9 deletions.
11 changes: 4 additions & 7 deletions drivers/gpu/drm/i915/gt/intel_engine_cs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1471,6 +1471,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
struct i915_gpu_error * const error = &engine->i915->gpu_error;
struct i915_request *rq;
intel_wakeref_t wakeref;
unsigned long flags;

if (header) {
va_list ap;
Expand All @@ -1490,10 +1491,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,
i915_reset_engine_count(error, engine),
i915_reset_count(error));

rcu_read_lock();

drm_printf(m, "\tRequests:\n");

spin_lock_irqsave(&engine->active.lock, flags);
rq = intel_engine_find_active_request(engine);
if (rq) {
print_request(m, rq, "\t\tactive ");
Expand All @@ -1513,8 +1513,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,

print_request_ring(m, rq);
}

rcu_read_unlock();
spin_unlock_irqrestore(&engine->active.lock, flags);

wakeref = intel_runtime_pm_get_if_in_use(&engine->i915->runtime_pm);
if (wakeref) {
Expand Down Expand Up @@ -1676,7 +1675,6 @@ struct i915_request *
intel_engine_find_active_request(struct intel_engine_cs *engine)
{
struct i915_request *request, *active = NULL;
unsigned long flags;

/*
* We are called by the error capture, reset and to dump engine
Expand All @@ -1689,7 +1687,7 @@ intel_engine_find_active_request(struct intel_engine_cs *engine)
* At all other times, we must assume the GPU is still running, but
* we only care about the snapshot of this moment.
*/
spin_lock_irqsave(&engine->active.lock, flags);
lockdep_assert_held(&engine->active.lock);
list_for_each_entry(request, &engine->active.requests, sched.link) {
if (i915_request_completed(request))
continue;
Expand All @@ -1704,7 +1702,6 @@ intel_engine_find_active_request(struct intel_engine_cs *engine)
active = request;
break;
}
spin_unlock_irqrestore(&engine->active.lock, flags);

return active;
}
Expand Down
6 changes: 4 additions & 2 deletions drivers/gpu/drm/i915/i915_gpu_error.c
Original file line number Diff line number Diff line change
Expand Up @@ -1411,6 +1411,7 @@ static void gem_record_rings(struct i915_gpu_state *error)
struct intel_engine_cs *engine = i915->engine[i];
struct drm_i915_error_engine *ee = &error->engine[i];
struct i915_request *request;
unsigned long flags;

ee->engine_id = -1;

Expand All @@ -1422,10 +1423,11 @@ static void gem_record_rings(struct i915_gpu_state *error)
error_record_engine_registers(error, engine, ee);
error_record_engine_execlists(engine, ee);

spin_lock_irqsave(&engine->active.lock, flags);
request = intel_engine_find_active_request(engine);
if (request) {
struct i915_gem_context *ctx = request->gem_context;
struct intel_ring *ring;
struct intel_ring *ring = request->ring;

ee->vm = ctx->vm ?: &engine->gt->ggtt->vm;

Expand Down Expand Up @@ -1455,14 +1457,14 @@ static void gem_record_rings(struct i915_gpu_state *error)
ee->rq_post = request->postfix;
ee->rq_tail = request->tail;

ring = request->ring;
ee->cpu_ring_head = ring->head;
ee->cpu_ring_tail = ring->tail;
ee->ringbuffer =
i915_error_object_create(i915, ring->vma);

engine_record_requests(engine, request, ee);
}
spin_unlock_irqrestore(&engine->active.lock, flags);

ee->hws_page =
i915_error_object_create(i915,
Expand Down

0 comments on commit cfe7288

Please sign in to comment.