Skip to content

Commit

Permalink
drm/i915/selftests: reset engine self tests
Browse files Browse the repository at this point in the history
Check that we can reset specific engines, also check the fallback to
full reset if something didn't work.

v2: rebase.
v3: use RESET_ENGINE_IN_PROGRESS flag.
v4: use I915_RESET_ENGINE flag.

Signed-off-by: Michel Thierry <michel.thierry@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170615201828.23144-12-michel.thierry@intel.com
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/20170620095751.13127-9-chris@chris-wilson.co.uk
  • Loading branch information
Michel Thierry authored and Chris Wilson committed Jun 20, 2017
1 parent 061d06a commit abeb4de
Showing 1 changed file with 148 additions and 0 deletions.
148 changes: 148 additions & 0 deletions drivers/gpu/drm/i915/selftests/intel_hangcheck.c
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,54 @@ static int igt_global_reset(void *arg)
return err;
}

static int igt_reset_engine(void *arg)
{
struct drm_i915_private *i915 = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
unsigned int reset_count, reset_engine_count;
int err = 0;

/* Check that we can issue a global GPU and engine reset */

if (!intel_has_reset_engine(i915))
return 0;

for_each_engine(engine, i915, id) {
set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags);
reset_count = i915_reset_count(&i915->gpu_error);
reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
engine);

err = i915_reset_engine(engine);
if (err) {
pr_err("i915_reset_engine failed\n");
break;
}

if (i915_reset_count(&i915->gpu_error) != reset_count) {
pr_err("Full GPU reset recorded! (engine reset expected)\n");
err = -EINVAL;
break;
}

if (i915_reset_engine_count(&i915->gpu_error, engine) ==
reset_engine_count) {
pr_err("No %s engine reset recorded!\n", engine->name);
err = -EINVAL;
break;
}

clear_bit(I915_RESET_ENGINE + engine->id,
&i915->gpu_error.flags);
}

if (i915_terminally_wedged(&i915->gpu_error))
err = -EIO;

return err;
}

static u32 fake_hangcheck(struct drm_i915_gem_request *rq)
{
u32 reset_count;
Expand Down Expand Up @@ -530,13 +578,113 @@ static int igt_reset_queue(void *arg)
return err;
}

static int igt_render_engine_reset_fallback(void *arg)
{
struct drm_i915_private *i915 = arg;
struct intel_engine_cs *engine = i915->engine[RCS];
struct hang h;
struct drm_i915_gem_request *rq;
unsigned int reset_count, reset_engine_count;
int err = 0;

/* Check that we can issue a global GPU and engine reset */

if (!intel_has_reset_engine(i915))
return 0;

set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
mutex_lock(&i915->drm.struct_mutex);

err = hang_init(&h, i915);
if (err)
goto unlock;

rq = hang_create_request(&h, engine, i915->kernel_context);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto fini;
}

i915_gem_request_get(rq);
__i915_add_request(rq, true);

/* make reset engine fail */
rq->fence.error = -EIO;

if (!wait_for_hang(&h, rq)) {
pr_err("Failed to start request %x\n", rq->fence.seqno);
err = -EIO;
goto out_rq;
}

reset_engine_count = i915_reset_engine_count(&i915->gpu_error, engine);
reset_count = fake_hangcheck(rq);

/* unlock since we'll call handle_error */
mutex_unlock(&i915->drm.struct_mutex);
clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
wake_up_all(&i915->gpu_error.reset_queue);

i915_handle_error(i915, intel_engine_flag(engine), "live test");

if (i915_reset_engine_count(&i915->gpu_error, engine) !=
reset_engine_count) {
pr_err("render engine reset recorded! (full reset expected)\n");
err = -EINVAL;
goto out_rq;
}

if (i915_reset_count(&i915->gpu_error) == reset_count) {
pr_err("No full GPU reset recorded!\n");
err = -EINVAL;
goto out_rq;
}

/*
* by using fence.error = -EIO, full reset sets the wedged flag, do one
* more full reset to re-enable the hw.
*/
if (i915_terminally_wedged(&i915->gpu_error)) {
set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
mutex_lock(&i915->drm.struct_mutex);
rq->fence.error = 0;

set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
i915_reset(i915);
GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
&i915->gpu_error.flags));

if (i915_reset_count(&i915->gpu_error) == reset_count) {
pr_err("No full GPU reset recorded!\n");
err = -EINVAL;
goto out_rq;
}
}

out_rq:
i915_gem_request_put(rq);
fini:
hang_fini(&h);
unlock:
mutex_unlock(&i915->drm.struct_mutex);
clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
wake_up_all(&i915->gpu_error.reset_queue);

if (i915_terminally_wedged(&i915->gpu_error))
return -EIO;

return err;
}

int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(igt_hang_sanitycheck),
SUBTEST(igt_global_reset),
SUBTEST(igt_reset_engine),
SUBTEST(igt_wait_reset),
SUBTEST(igt_reset_queue),
SUBTEST(igt_render_engine_reset_fallback),
};

if (!intel_has_gpu_reset(i915))
Expand Down

0 comments on commit abeb4de

Please sign in to comment.