Skip to content

Commit

Permalink
drm/i915: Track gt pm wakerefs
Browse files Browse the repository at this point in the history
Track every intel_gt_pm_get() until its corresponding release in
intel_gt_pm_put() by returning a cookie to the caller for acquire that
must be passed by on released. When there is an imbalance, we can see who
either tried to free a stale wakeref, or who forgot to free theirs.

v2: track recently added calls in gen8_ggtt_bind_get_ce and
    destroyed_worker_func

Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231030-ref_tracker_i915-v1-2-006fe6b96421@intel.com
  • Loading branch information
Andrzej Hajda committed Nov 20, 2023
1 parent b49e894 commit 5e4e06e
Show file tree
Hide file tree
Showing 24 changed files with 197 additions and 90 deletions.
14 changes: 14 additions & 0 deletions drivers/gpu/drm/i915/Kconfig.debug
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ config DRM_I915_DEBUG
select DRM_I915_DEBUG_GEM_ONCE
select DRM_I915_DEBUG_MMIO
select DRM_I915_DEBUG_RUNTIME_PM
select DRM_I915_DEBUG_WAKEREF
select DRM_I915_SW_FENCE_DEBUG_OBJECTS
select DRM_I915_SELFTEST
default n
Expand Down Expand Up @@ -244,3 +245,16 @@ config DRM_I915_DEBUG_RUNTIME_PM
Recommended for driver developers only.

If in doubt, say "N"

config DRM_I915_DEBUG_WAKEREF
bool "Enable extra tracking for wakerefs"
depends on DRM_I915
select REF_TRACKER
select STACKDEPOT
select STACKTRACE
help
Choose this option to turn on extra state checking and usage
tracking for the wakerefPM functionality. This may introduce
overhead during driver runtime.

If in doubt, say "N"
14 changes: 8 additions & 6 deletions drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,8 @@ struct i915_execbuffer {
struct intel_gt *gt; /* gt for the execbuf */
struct intel_context *context; /* logical state for the request */
struct i915_gem_context *gem_context; /** caller's context */
intel_wakeref_t wakeref;
intel_wakeref_t wakeref_gt0;

/** our requests to build */
struct i915_request *requests[MAX_ENGINE_INSTANCE + 1];
Expand Down Expand Up @@ -2719,13 +2721,13 @@ eb_select_engine(struct i915_execbuffer *eb)

for_each_child(ce, child)
intel_context_get(child);
intel_gt_pm_get(gt);
eb->wakeref = intel_gt_pm_get(ce->engine->gt);
/*
* Keep GT0 active on MTL so that i915_vma_parked() doesn't
* free VMAs while execbuf ioctl is validating VMAs.
*/
if (gt->info.id)
intel_gt_pm_get(to_gt(gt->i915));
eb->wakeref_gt0 = intel_gt_pm_get(to_gt(gt->i915));

if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
err = intel_context_alloc_state(ce);
Expand Down Expand Up @@ -2765,9 +2767,9 @@ eb_select_engine(struct i915_execbuffer *eb)

err:
if (gt->info.id)
intel_gt_pm_put(to_gt(gt->i915));
intel_gt_pm_put(to_gt(gt->i915), eb->wakeref_gt0);

intel_gt_pm_put(gt);
intel_gt_pm_put(ce->engine->gt, eb->wakeref);
for_each_child(ce, child)
intel_context_put(child);
intel_context_put(ce);
Expand All @@ -2785,8 +2787,8 @@ eb_put_engine(struct i915_execbuffer *eb)
* i915_vma_parked() from interfering while execbuf validates vmas.
*/
if (eb->gt->info.id)
intel_gt_pm_put(to_gt(eb->gt->i915));
intel_gt_pm_put(eb->gt);
intel_gt_pm_put(to_gt(eb->gt->i915), eb->wakeref_gt0);
intel_gt_pm_put(eb->context->engine->gt, eb->wakeref);
for_each_child(eb->context, child)
intel_context_put(child);
intel_context_put(eb->context);
Expand Down
10 changes: 6 additions & 4 deletions drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)

static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
{
intel_wakeref_t wakeref;
struct i915_vma *vma;
u32 __iomem *map;
int err = 0;
Expand All @@ -99,7 +100,7 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
if (IS_ERR(vma))
return PTR_ERR(vma);

intel_gt_pm_get(vma->vm->gt);
wakeref = intel_gt_pm_get(vma->vm->gt);

map = i915_vma_pin_iomap(vma);
i915_vma_unpin(vma);
Expand All @@ -112,12 +113,13 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
i915_vma_unpin_iomap(vma);

out_rpm:
intel_gt_pm_put(vma->vm->gt);
intel_gt_pm_put(vma->vm->gt, wakeref);
return err;
}

static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
{
intel_wakeref_t wakeref;
struct i915_vma *vma;
u32 __iomem *map;
int err = 0;
Expand All @@ -132,7 +134,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
if (IS_ERR(vma))
return PTR_ERR(vma);

intel_gt_pm_get(vma->vm->gt);
wakeref = intel_gt_pm_get(vma->vm->gt);

map = i915_vma_pin_iomap(vma);
i915_vma_unpin(vma);
Expand All @@ -145,7 +147,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
i915_vma_unpin_iomap(vma);

out_rpm:
intel_gt_pm_put(vma->vm->gt);
intel_gt_pm_put(vma->vm->gt, wakeref);
return err;
}

Expand Down
14 changes: 8 additions & 6 deletions drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
Original file line number Diff line number Diff line change
Expand Up @@ -630,14 +630,14 @@ static bool assert_mmap_offset(struct drm_i915_private *i915,
static void disable_retire_worker(struct drm_i915_private *i915)
{
i915_gem_driver_unregister__shrinker(i915);
intel_gt_pm_get(to_gt(i915));
intel_gt_pm_get_untracked(to_gt(i915));
cancel_delayed_work_sync(&to_gt(i915)->requests.retire_work);
}

static void restore_retire_worker(struct drm_i915_private *i915)
{
igt_flush_test(i915);
intel_gt_pm_put(to_gt(i915));
intel_gt_pm_put_untracked(to_gt(i915));
i915_gem_driver_register__shrinker(i915);
}

Expand Down Expand Up @@ -778,6 +778,7 @@ static int igt_mmap_offset_exhaustion(void *arg)

static int gtt_set(struct drm_i915_gem_object *obj)
{
intel_wakeref_t wakeref;
struct i915_vma *vma;
void __iomem *map;
int err = 0;
Expand All @@ -786,7 +787,7 @@ static int gtt_set(struct drm_i915_gem_object *obj)
if (IS_ERR(vma))
return PTR_ERR(vma);

intel_gt_pm_get(vma->vm->gt);
wakeref = intel_gt_pm_get(vma->vm->gt);
map = i915_vma_pin_iomap(vma);
i915_vma_unpin(vma);
if (IS_ERR(map)) {
Expand All @@ -798,12 +799,13 @@ static int gtt_set(struct drm_i915_gem_object *obj)
i915_vma_unpin_iomap(vma);

out:
intel_gt_pm_put(vma->vm->gt);
intel_gt_pm_put(vma->vm->gt, wakeref);
return err;
}

static int gtt_check(struct drm_i915_gem_object *obj)
{
intel_wakeref_t wakeref;
struct i915_vma *vma;
void __iomem *map;
int err = 0;
Expand All @@ -812,7 +814,7 @@ static int gtt_check(struct drm_i915_gem_object *obj)
if (IS_ERR(vma))
return PTR_ERR(vma);

intel_gt_pm_get(vma->vm->gt);
wakeref = intel_gt_pm_get(vma->vm->gt);
map = i915_vma_pin_iomap(vma);
i915_vma_unpin(vma);
if (IS_ERR(map)) {
Expand All @@ -828,7 +830,7 @@ static int gtt_check(struct drm_i915_gem_object *obj)
i915_vma_unpin_iomap(vma);

out:
intel_gt_pm_put(vma->vm->gt);
intel_gt_pm_put(vma->vm->gt, wakeref);
return err;
}

Expand Down
13 changes: 9 additions & 4 deletions drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,14 @@ static void irq_disable(struct intel_breadcrumbs *b)

static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
{
intel_wakeref_t wakeref;

/*
* Since we are waiting on a request, the GPU should be busy
* and should have its own rpm reference.
*/
if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt)))
wakeref = intel_gt_pm_get_if_awake(b->irq_engine->gt);
if (GEM_WARN_ON(!wakeref))
return;

/*
Expand All @@ -41,7 +44,7 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
* which we can add a new waiter and avoid the cost of re-enabling
* the irq.
*/
WRITE_ONCE(b->irq_armed, true);
WRITE_ONCE(b->irq_armed, wakeref);

/* Requests may have completed before we could enable the interrupt. */
if (!b->irq_enabled++ && b->irq_enable(b))
Expand All @@ -61,12 +64,14 @@ static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)

static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
{
intel_wakeref_t wakeref = b->irq_armed;

GEM_BUG_ON(!b->irq_enabled);
if (!--b->irq_enabled)
b->irq_disable(b);

WRITE_ONCE(b->irq_armed, false);
intel_gt_pm_put_async(b->irq_engine->gt);
WRITE_ONCE(b->irq_armed, 0);
intel_gt_pm_put_async(b->irq_engine->gt, wakeref);
}

static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
Expand Down
3 changes: 2 additions & 1 deletion drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <linux/types.h>

#include "intel_engine_types.h"
#include "intel_wakeref.h"

/*
* Rather than have every client wait upon all user interrupts,
Expand Down Expand Up @@ -43,7 +44,7 @@ struct intel_breadcrumbs {
spinlock_t irq_lock; /* protects the interrupt from hardirq context */
struct irq_work irq_work; /* for use from inside irq_lock */
unsigned int irq_enabled;
bool irq_armed;
intel_wakeref_t irq_armed;

/* Not all breadcrumbs are attached to physical HW */
intel_engine_mask_t engine_mask;
Expand Down
4 changes: 2 additions & 2 deletions drivers/gpu/drm/i915/gt/intel_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ static inline void intel_context_enter(struct intel_context *ce)
return;

ce->ops->enter(ce);
intel_gt_pm_get(ce->vm->gt);
ce->wakeref = intel_gt_pm_get(ce->vm->gt);
}

static inline void intel_context_mark_active(struct intel_context *ce)
Expand All @@ -229,7 +229,7 @@ static inline void intel_context_exit(struct intel_context *ce)
if (--ce->active_count)
return;

intel_gt_pm_put_async(ce->vm->gt);
intel_gt_pm_put_async(ce->vm->gt, ce->wakeref);
ce->ops->exit(ce);
}

Expand Down
2 changes: 2 additions & 0 deletions drivers/gpu/drm/i915/gt/intel_context_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "i915_utils.h"
#include "intel_engine_types.h"
#include "intel_sseu.h"
#include "intel_wakeref.h"

#include "uc/intel_guc_fwif.h"

Expand Down Expand Up @@ -112,6 +113,7 @@ struct intel_context {
u32 ring_size;
struct intel_ring *ring;
struct intel_timeline *timeline;
intel_wakeref_t wakeref;

unsigned long flags;
#define CONTEXT_BARRIER_BIT 0
Expand Down
7 changes: 4 additions & 3 deletions drivers/gpu/drm/i915/gt/intel_engine_pm.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ static int __engine_unpark(struct intel_wakeref *wf)

ENGINE_TRACE(engine, "\n");

intel_gt_pm_get(engine->gt);
engine->wakeref_track = intel_gt_pm_get(engine->gt);

/* Discard stale context state from across idling */
ce = engine->kernel_context;
Expand Down Expand Up @@ -122,6 +122,7 @@ __queue_and_release_pm(struct i915_request *rq,
*/
GEM_BUG_ON(rq->context->active_count != 1);
__intel_gt_pm_get(engine->gt);
rq->context->wakeref = intel_wakeref_track(&engine->gt->wakeref);

/*
* We have to serialise all potential retirement paths with our
Expand Down Expand Up @@ -285,7 +286,7 @@ static int __engine_park(struct intel_wakeref *wf)
engine->park(engine);

/* While gt calls i915_vma_parked(), we have to break the lock cycle */
intel_gt_pm_put_async(engine->gt);
intel_gt_pm_put_async(engine->gt, engine->wakeref_track);
return 0;
}

Expand All @@ -296,7 +297,7 @@ static const struct intel_wakeref_ops wf_ops = {

void intel_engine_init__pm(struct intel_engine_cs *engine)
{
intel_wakeref_init(&engine->wakeref, engine->i915, &wf_ops);
intel_wakeref_init(&engine->wakeref, engine->i915, &wf_ops, engine->name);
intel_engine_init_heartbeat(engine);

intel_gsc_idle_msg_enable(engine);
Expand Down
2 changes: 2 additions & 0 deletions drivers/gpu/drm/i915/gt/intel_engine_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,9 @@ struct intel_engine_cs {
unsigned long serial;

unsigned long wakeref_serial;
intel_wakeref_t wakeref_track;
struct intel_wakeref wakeref;

struct file *default_state;

struct {
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/i915/gt/intel_execlists_submission.c
Original file line number Diff line number Diff line change
Expand Up @@ -630,7 +630,7 @@ static void __execlists_schedule_out(struct i915_request * const rq,
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
if (engine->fw_domain && !--engine->fw_active)
intel_uncore_forcewake_put(engine->uncore, engine->fw_domain);
intel_gt_pm_put_async(engine->gt);
intel_gt_pm_put_async_untracked(engine->gt);

/*
* If this is part of a virtual engine, its next request may
Expand Down
16 changes: 9 additions & 7 deletions drivers/gpu/drm/i915/gt/intel_ggtt.c
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt)
return intel_gt_is_bind_context_ready(gt);
}

static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt)
static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt, intel_wakeref_t *wakeref)
{
struct intel_context *ce;
struct intel_gt *gt = ggtt->vm.gt;
Expand All @@ -313,18 +313,19 @@ static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt)
* would conflict with fs_reclaim trying to allocate memory while
* doing rpm_resume().
*/
if (!intel_gt_pm_get_if_awake(gt))
*wakeref = intel_gt_pm_get_if_awake(gt);
if (!*wakeref)
return NULL;

intel_engine_pm_get(ce->engine);

return ce;
}

static void gen8_ggtt_bind_put_ce(struct intel_context *ce)
static void gen8_ggtt_bind_put_ce(struct intel_context *ce, intel_wakeref_t wakeref)
{
intel_engine_pm_put(ce->engine);
intel_gt_pm_put(ce->engine->gt);
intel_gt_pm_put(ce->engine->gt, wakeref);
}

static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset,
Expand All @@ -337,12 +338,13 @@ static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset,
struct sgt_iter iter;
struct i915_request *rq;
struct intel_context *ce;
intel_wakeref_t wakeref;
u32 *cs;

if (!num_entries)
return true;

ce = gen8_ggtt_bind_get_ce(ggtt);
ce = gen8_ggtt_bind_get_ce(ggtt, &wakeref);
if (!ce)
return false;

Expand Down Expand Up @@ -418,13 +420,13 @@ static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset,
offset += n_ptes;
}

gen8_ggtt_bind_put_ce(ce);
gen8_ggtt_bind_put_ce(ce, wakeref);
return true;

err_rq:
i915_request_put(rq);
put_ce:
gen8_ggtt_bind_put_ce(ce);
gen8_ggtt_bind_put_ce(ce, wakeref);
return false;
}

Expand Down
Loading

0 comments on commit 5e4e06e

Please sign in to comment.