drm/i915: Keep contexts pinned until after the next kernel context sw…

…itch We need to keep the context image pinned in memory until after the GPU has finished writing into it. Since it continues to write as we signal the final breadcrumb, we need to keep it pinned until the request after it is complete. Currently we know the order in which requests execute on each engine, and so to remove that presumption we need to identify a request/context-switch we know must occur after our completion. Any request queued after the signal must imply a context switch, for simplicity we use a fresh request from the kernel context. The sequence of operations for keeping the context pinned until saved is: - On context activation, we preallocate a node for each physical engine the context may operate on. This is to avoid allocations during unpinning, which may be from inside FS_RECLAIM context (aka the shrinker) - On context deactivation on retirement of the last active request (which is before we know the context has been saved), we add the preallocated node onto a barrier list on each engine - On engine idling, we emit a switch to kernel context. When this switch completes, we know that all previous contexts must have been saved, and so on retiring this request we can finally unpin all the contexts that were marked as deactivated prior to the switch. We can enhance this in future by flushing all the idle contexts on a regular heartbeat pulse of a switch to kernel context, which will also be used to check for hung engines. v2: intel_context_active_acquire/_release Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190614164606.15633-1-chris@chris-wilson.co.uk
mariux64 · Jun 14, 2019 · ce476c8 · ce476c8
1 parent 58a111f
commit ce476c8
Show file tree

Hide file tree

Showing 20 changed files with 219 additions and 195 deletions.
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -692,17 +692,6 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
 	return 0;
 }
 
-void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
-	for_each_engine(engine, dev_priv, id)
-		intel_engine_lost_context(engine);
-}
-
 void i915_gem_contexts_fini(struct drm_i915_private *i915)
 {
 	lockdep_assert_held(&i915->drm.struct_mutex);
@@ -1203,20 +1192,19 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 	if (ret)
 		goto out_add;
 
-	ret = gen8_emit_rpcs_config(rq, ce, sseu);
-	if (ret)
-		goto out_add;
-
 	/*
 	 * Guarantee context image and the timeline remains pinned until the
 	 * modifying request is retired by setting the ce activity tracker.
 	 *
 	 * But we only need to take one pin on the account of it. Or in other
 	 * words transfer the pinned ce object to tracked active request.
 	 */
-	if (!i915_active_request_isset(&ce->active_tracker))
-		__intel_context_pin(ce);
-	__i915_active_request_set(&ce->active_tracker, rq);
+	GEM_BUG_ON(i915_active_is_idle(&ce->active));
+	ret = i915_active_ref(&ce->active, rq->fence.context, rq);
+	if (ret)
+		goto out_add;
+
+	ret = gen8_emit_rpcs_config(rq, ce, sseu);
 
 out_add:
 	i915_request_add(rq);

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -134,7 +134,6 @@ static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx)
 
 /* i915_gem_context.c */
 int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv);
-void i915_gem_contexts_lost(struct drm_i915_private *dev_priv);
 void i915_gem_contexts_fini(struct drm_i915_private *dev_priv);
 
 int i915_gem_context_open(struct drm_i915_private *i915,

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -10,15 +10,33 @@
 #include "i915_drv.h"
 #include "i915_globals.h"
 
+static void call_idle_barriers(struct intel_engine_cs *engine)
+{
+	struct llist_node *node, *next;
+
+	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
+		struct i915_active_request *active =
+			container_of((struct list_head *)node,
+				     typeof(*active), link);
+
+		INIT_LIST_HEAD(&active->link);
+		RCU_INIT_POINTER(active->request, NULL);
+
+		active->retire(active, NULL);
+	}
+}
+
 static void i915_gem_park(struct drm_i915_private *i915)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
 
-	for_each_engine(engine, i915, id)
+	for_each_engine(engine, i915, id) {
+		call_idle_barriers(engine); /* cleanup after wedging */
 		i915_gem_batch_pool_fini(&engine->batch_pool);
+	}
 
 	i915_timelines_park(i915);
 	i915_vma_parked(i915);

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -160,18 +160,13 @@ i915_gem_shrink(struct drm_i915_private *i915,
 		return 0;
 
 	/*
-	 * When shrinking the active list, also consider active contexts.
-	 * Active contexts are pinned until they are retired, and so can
-	 * not be simply unbound to retire and unpin their pages. To shrink
-	 * the contexts, we must wait until the gpu is idle.
-	 *
-	 * We don't care about errors here; if we cannot wait upon the GPU,
-	 * we will free as much as we can and hope to get a second chance.
+	 * When shrinking the active list, we should also consider active
+	 * contexts. Active contexts are pinned until they are retired, and
+	 * so can not be simply unbound to retire and unpin their pages. To
+	 * shrink the contexts, we must wait until the gpu is idle and
+	 * completed its switch to the kernel context. In short, we do
+	 * not have a good mechanism for idling a specific context.
 	 */
-	if (shrink & I915_SHRINK_ACTIVE)
-		i915_gem_wait_for_idle(i915,
-				       I915_WAIT_LOCKED,
-				       MAX_SCHEDULE_TIMEOUT);
 
 	trace_i915_gem_shrink(i915, target, shrink);
 	i915_retire_requests(i915);

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -61,7 +61,6 @@ int __intel_context_do_pin(struct intel_context *ce)
 
 		i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */
 
-		intel_context_get(ce);
 		smp_mb__before_atomic(); /* flush pin before it is visible */
 	}
 
@@ -89,20 +88,45 @@ void intel_context_unpin(struct intel_context *ce)
 		ce->ops->unpin(ce);
 
 		i915_gem_context_put(ce->gem_context);
-		intel_context_put(ce);
+		intel_context_active_release(ce);
 	}
 
 	mutex_unlock(&ce->pin_mutex);
 	intel_context_put(ce);
 }
 
-static void intel_context_retire(struct i915_active_request *active,
-				 struct i915_request *rq)
+static int __context_pin_state(struct i915_vma *vma, unsigned long flags)
 {
-	struct intel_context *ce =
-		container_of(active, typeof(*ce), active_tracker);
+	int err;
 
-	intel_context_unpin(ce);
+	err = i915_vma_pin(vma, 0, 0, flags | PIN_GLOBAL);
+	if (err)
+		return err;
+
+	/*
+	 * And mark it as a globally pinned object to let the shrinker know
+	 * it cannot reclaim the object until we release it.
+	 */
+	vma->obj->pin_global++;
+	vma->obj->mm.dirty = true;
+
+	return 0;
+}
+
+static void __context_unpin_state(struct i915_vma *vma)
+{
+	vma->obj->pin_global--;
+	__i915_vma_unpin(vma);
+}
+
+static void intel_context_retire(struct i915_active *active)
+{
+	struct intel_context *ce = container_of(active, typeof(*ce), active);
+
+	if (ce->state)
+		__context_unpin_state(ce->state);
+
+	intel_context_put(ce);
 }
 
 void
@@ -125,8 +149,46 @@ intel_context_init(struct intel_context *ce,
 
 	mutex_init(&ce->pin_mutex);
 
-	i915_active_request_init(&ce->active_tracker,
-				 NULL, intel_context_retire);
+	i915_active_init(ctx->i915, &ce->active, intel_context_retire);
+}
+
+int intel_context_active_acquire(struct intel_context *ce, unsigned long flags)
+{
+	int err;
+
+	if (!i915_active_acquire(&ce->active))
+		return 0;
+
+	intel_context_get(ce);
+
+	if (!ce->state)
+		return 0;
+
+	err = __context_pin_state(ce->state, flags);
+	if (err) {
+		i915_active_cancel(&ce->active);
+		intel_context_put(ce);
+		return err;
+	}
+
+	/* Preallocate tracking nodes */
+	if (!i915_gem_context_is_kernel(ce->gem_context)) {
+		err = i915_active_acquire_preallocate_barrier(&ce->active,
+							      ce->engine);
+		if (err) {
+			i915_active_release(&ce->active);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+void intel_context_active_release(struct intel_context *ce)
+{
+	/* Nodes preallocated in intel_context_active() */
+	i915_active_acquire_barrier(&ce->active);
+	i915_active_release(&ce->active);
 }
 
 static void i915_global_context_shrink(void)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -102,6 +102,9 @@ static inline void intel_context_exit(struct intel_context *ce)
 		ce->ops->exit(ce);
 }
 
+int intel_context_active_acquire(struct intel_context *ce, unsigned long flags);
+void intel_context_active_release(struct intel_context *ce);
+
 static inline struct intel_context *intel_context_get(struct intel_context *ce)
 {
 	kref_get(&ce->ref);

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -56,10 +56,10 @@ struct intel_context {
 	intel_engine_mask_t saturated; /* submitting semaphores too late? */
 
 	/**
-	 * active_tracker: Active tracker for the external rq activity
-	 * on this intel_context object.
+	 * active: Active tracker for the rq activity (inc. external) on this
+	 * intel_context object.
 	 */
-	struct i915_active_request active_tracker;
+	struct i915_active active;
 
 	const struct intel_context_ops *ops;
 

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -467,8 +467,6 @@ static inline void intel_engine_reset(struct intel_engine_cs *engine,
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
 
-void intel_engine_lost_context(struct intel_engine_cs *engine);
-
 void intel_engines_reset_default_submission(struct drm_i915_private *i915);
 unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
 

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -611,6 +611,8 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
 {
 	int err;
 
+	init_llist_head(&engine->barrier_tasks);
+
 	err = init_status_page(engine);
 	if (err)
 		return err;
@@ -870,6 +872,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 	if (engine->preempt_context)
 		intel_context_unpin(engine->preempt_context);
 	intel_context_unpin(engine->kernel_context);
+	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
 
 	i915_timeline_fini(&engine->timeline);
 
@@ -1201,26 +1204,6 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)
 		engine->set_default_submission(engine);
 }
 
-/**
- * intel_engine_lost_context: called when the GPU is reset into unknown state
- * @engine: the engine
- *
- * We have either reset the GPU or otherwise about to lose state tracking of
- * the current GPU logical state (e.g. suspend). On next use, it is therefore
- * imperative that we make no presumptions about the current state and load
- * from scratch.
- */
-void intel_engine_lost_context(struct intel_engine_cs *engine)
-{
-	struct intel_context *ce;
-
-	lockdep_assert_held(&engine->i915->drm.struct_mutex);
-
-	ce = fetch_and_zero(&engine->last_retired_context);
-	if (ce)
-		intel_context_unpin(ce);
-}
-
 bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
 {
 	switch (INTEL_GEN(engine->i915)) {

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -88,6 +88,8 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 
 	/* Check again on the next retirement. */
 	engine->wakeref_serial = engine->serial + 1;
+
+	i915_request_add_barriers(rq);
 	__i915_request_commit(rq);
 
 	return false;

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -11,6 +11,7 @@
 #include <linux/irq_work.h>
 #include <linux/kref.h>
 #include <linux/list.h>
+#include <linux/llist.h>
 #include <linux/types.h>
 
 #include "i915_gem.h"
@@ -288,6 +289,7 @@ struct intel_engine_cs {
 	struct intel_ring *buffer;
 
 	struct i915_timeline timeline;
+	struct llist_head barrier_tasks;
 
 	struct intel_context *kernel_context; /* pinned */
 	struct intel_context *preempt_context; /* pinned; optional */
@@ -435,17 +437,6 @@ struct intel_engine_cs {
 
 	struct intel_engine_execlists execlists;
 
-	/* Contexts are pinned whilst they are active on the GPU. The last
-	 * context executed remains active whilst the GPU is idle - the
-	 * switch away and write to the context object only occurs on the
-	 * next execution.  Contexts are only unpinned on retirement of the
-	 * following request ensuring that we can always write to the object
-	 * on the context switch even after idling. Across suspend, we switch
-	 * to the kernel context and trash it as the save may not happen
-	 * before the hardware is powered down.
-	 */
-	struct intel_context *last_retired_context;
-
 	/* status_notifier: list of callbacks for context-switch changes */
 	struct atomic_notifier_head context_status_notifier;