Skip to content

Commit

Permalink
drm/i915: Keep a global seqno per-engine
Browse files Browse the repository at this point in the history
Replace the global device seqno with one for each engine, and account
for in-flight seqno on each separately. This is consistent with
dma-fence as each timeline has separate fence-contexts for each engine
and a seqno is only ordered within a fence-context (i.e.  seqno do not
need to be ordered wrt to other engines, just ordered within a single
engine). This is required to enable request rewinding for preemption on
individual engines (we have to rewind the global seqno to avoid
overflow, and we do not have to rewind all engines just to preempt one.)

v2: Rename active_seqno to inflight_seqnos to more clearly indicate that
it is a counter and not equivalent to the existing seqno. Update
functions that operated on active_seqno similarly.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-3-chris@chris-wilson.co.uk
  • Loading branch information
Chris Wilson committed Feb 23, 2017
1 parent 309663a commit 9b6586a
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 80 deletions.
11 changes: 1 addition & 10 deletions drivers/gpu/drm/i915/i915_debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1079,15 +1079,6 @@ static const struct file_operations i915_error_state_fops = {
};
#endif

static int
i915_next_seqno_get(void *data, u64 *val)
{
struct drm_i915_private *dev_priv = data;

*val = 1 + atomic_read(&dev_priv->gt.global_timeline.seqno);
return 0;
}

static int
i915_next_seqno_set(void *data, u64 val)
{
Expand All @@ -1106,7 +1097,7 @@ i915_next_seqno_set(void *data, u64 val)
}

DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops,
i915_next_seqno_get, i915_next_seqno_set,
NULL, i915_next_seqno_set,
"0x%llx\n");

static int i915_frequency_info(struct seq_file *m, void *unused)
Expand Down
83 changes: 45 additions & 38 deletions drivers/gpu/drm/i915/i915_gem_request.c
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,12 @@ i915_priotree_init(struct i915_priotree *pt)
pt->priority = INT_MIN;
}

static void unreserve_seqno(struct intel_engine_cs *engine)
{
GEM_BUG_ON(!engine->timeline->inflight_seqnos);
engine->timeline->inflight_seqnos--;
}

void i915_gem_retire_noop(struct i915_gem_active *active,
struct drm_i915_gem_request *request)
{
Expand Down Expand Up @@ -237,6 +243,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
&request->i915->gt.idle_work,
msecs_to_jiffies(100));
}
unreserve_seqno(request->engine);

/* Walk through the active list, calling retire on each. This allows
* objects to track their GPU activity and mark themselves as idle
Expand Down Expand Up @@ -307,7 +314,7 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
} while (tmp != req);
}

static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
{
struct i915_gem_timeline *timeline = &i915->gt.global_timeline;
struct intel_engine_cs *engine;
Expand All @@ -325,15 +332,19 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
GEM_BUG_ON(i915->gt.active_requests > 1);

/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
if (!i915_seqno_passed(seqno, atomic_read(&timeline->seqno))) {
while (intel_breadcrumbs_busy(i915))
cond_resched(); /* spin until threads are complete */
}
atomic_set(&timeline->seqno, seqno);
for_each_engine(engine, i915, id) {
struct intel_timeline *tl = &timeline->engine[id];

/* Finally reset hw state */
for_each_engine(engine, i915, id)
if (!i915_seqno_passed(seqno, tl->seqno)) {
/* spin until threads are complete */
while (intel_breadcrumbs_busy(engine))
cond_resched();
}

/* Finally reset hw state */
tl->seqno = seqno;
intel_engine_init_global_seqno(engine, seqno);
}

list_for_each_entry(timeline, &i915->gt.timelines, link) {
for_each_engine(engine, i915, id) {
Expand All @@ -358,37 +369,38 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
/* HWS page needs to be set less than what we
* will inject to ring
*/
return i915_gem_init_global_seqno(dev_priv, seqno - 1);
return reset_all_global_seqno(dev_priv, seqno - 1);
}

static int reserve_global_seqno(struct drm_i915_private *i915)
static int reserve_seqno(struct intel_engine_cs *engine)
{
u32 active_requests = ++i915->gt.active_requests;
u32 seqno = atomic_read(&i915->gt.global_timeline.seqno);
u32 active = ++engine->timeline->inflight_seqnos;
u32 seqno = engine->timeline->seqno;
int ret;

/* Reservation is fine until we need to wrap around */
if (likely(seqno + active_requests > seqno))
if (likely(!add_overflows(seqno, active)))
return 0;

ret = i915_gem_init_global_seqno(i915, 0);
/* Even though we are tracking inflight seqno individually on each
* engine, other engines may be observing us using hw semaphores and
* so we need to idle all engines before wrapping around this engine.
* As all engines are then idle, we can reset the seqno on all, so
* we don't stall in quick succession if each engine is being
* similarly utilized.
*/
ret = reset_all_global_seqno(engine->i915, 0);
if (ret) {
i915->gt.active_requests--;
engine->timeline->inflight_seqnos--;
return ret;
}

return 0;
}

static u32 __timeline_get_seqno(struct i915_gem_timeline *tl)
{
/* seqno only incremented under a mutex */
return ++tl->seqno.counter;
}

static u32 timeline_get_seqno(struct i915_gem_timeline *tl)
static u32 timeline_get_seqno(struct intel_timeline *tl)
{
return atomic_inc_return(&tl->seqno);
return ++tl->seqno;
}

void __i915_gem_request_submit(struct drm_i915_gem_request *request)
Expand All @@ -402,14 +414,10 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
GEM_BUG_ON(timeline == request->timeline);
assert_spin_locked(&timeline->lock);

seqno = timeline_get_seqno(timeline->common);
seqno = timeline_get_seqno(timeline);
GEM_BUG_ON(!seqno);
GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno));

GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, seqno));
request->previous_seqno = timeline->last_submitted_seqno;
timeline->last_submitted_seqno = seqno;

/* We may be recursing from the signal callback of another i915 fence */
spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
request->global_seqno = seqno;
Expand Down Expand Up @@ -516,7 +524,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
if (ret)
return ERR_PTR(ret);

ret = reserve_global_seqno(dev_priv);
ret = reserve_seqno(engine);
if (ret)
goto err_unpin;

Expand Down Expand Up @@ -568,7 +576,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
&i915_fence_ops,
&req->lock,
req->timeline->fence_context,
__timeline_get_seqno(req->timeline->common));
timeline_get_seqno(req->timeline));

/* We bump the ref for the fence chain */
i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify);
Expand Down Expand Up @@ -613,6 +621,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
*/
req->head = req->ring->tail;

/* Check that we didn't interrupt ourselves with a new request */
GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
return req;

err_ctx:
Expand All @@ -623,7 +633,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,

kmem_cache_free(dev_priv->requests, req);
err_unreserve:
dev_priv->gt.active_requests--;
unreserve_seqno(engine);
err_unpin:
engine->context_unpin(engine, ctx);
return ERR_PTR(ret);
Expand Down Expand Up @@ -836,8 +846,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
* our i915_gem_request_alloc() and called __i915_add_request() before
* us, the timeline will hold its seqno which is later than ours.
*/
GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
request->fence.seqno));
GEM_BUG_ON(timeline->seqno != request->fence.seqno);

/*
* To ensure that this call will not fail, space for its emissions
Expand Down Expand Up @@ -891,16 +900,14 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
list_add_tail(&request->link, &timeline->requests);
spin_unlock_irq(&timeline->lock);

GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
request->fence.seqno));

timeline->last_submitted_seqno = request->fence.seqno;
GEM_BUG_ON(timeline->seqno != request->fence.seqno);
i915_gem_active_set(&timeline->last_request, request);

list_add_tail(&request->ring_link, &ring->request_list);
request->emitted_jiffies = jiffies;

i915_gem_mark_busy(engine);
if (!request->i915->gt.active_requests++)
i915_gem_mark_busy(engine);

/* Let the backend know a new request has arrived that may need
* to adjust the existing execution schedule due to a high priority
Expand Down
8 changes: 1 addition & 7 deletions drivers/gpu/drm/i915/i915_gem_request.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,12 +145,6 @@ struct drm_i915_gem_request {

u32 global_seqno;

/** GEM sequence number associated with the previous request,
* when the HWS breadcrumb is equal to this the GPU is processing
* this request.
*/
u32 previous_seqno;

/** Position in the ring of the start of the request */
u32 head;

Expand Down Expand Up @@ -287,7 +281,7 @@ __i915_gem_request_started(const struct drm_i915_gem_request *req)
{
GEM_BUG_ON(!req->global_seqno);
return i915_seqno_passed(intel_engine_get_seqno(req->engine),
req->previous_seqno);
req->global_seqno - 1);
}

static inline bool
Expand Down
9 changes: 7 additions & 2 deletions drivers/gpu/drm/i915/i915_gem_timeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,13 @@ struct i915_gem_timeline;

struct intel_timeline {
u64 fence_context;
u32 last_submitted_seqno;
u32 seqno;

/**
* Count of outstanding requests, from the time they are constructed
* to the moment they are retired. Loosely coupled to hardware.
*/
u32 inflight_seqnos;

spinlock_t lock;

Expand All @@ -56,7 +62,6 @@ struct intel_timeline {

struct i915_gem_timeline {
struct list_head link;
atomic_t seqno;

struct drm_i915_private *i915;
const char *name;
Expand Down
33 changes: 14 additions & 19 deletions drivers/gpu/drm/i915/intel_breadcrumbs.c
Original file line number Diff line number Diff line change
Expand Up @@ -676,31 +676,26 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
cancel_fake_irq(engine);
}

unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915)
bool intel_breadcrumbs_busy(struct intel_engine_cs *engine)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
unsigned int mask = 0;

for_each_engine(engine, i915, id) {
struct intel_breadcrumbs *b = &engine->breadcrumbs;

spin_lock_irq(&b->lock);
struct intel_breadcrumbs *b = &engine->breadcrumbs;
bool busy = false;

if (b->first_wait) {
wake_up_process(b->first_wait->tsk);
mask |= intel_engine_flag(engine);
}
spin_lock_irq(&b->lock);

if (b->first_signal) {
wake_up_process(b->signaler);
mask |= intel_engine_flag(engine);
}
if (b->first_wait) {
wake_up_process(b->first_wait->tsk);
busy |= intel_engine_flag(engine);
}

spin_unlock_irq(&b->lock);
if (b->first_signal) {
wake_up_process(b->signaler);
busy |= intel_engine_flag(engine);
}

return mask;
spin_unlock_irq(&b->lock);

return busy;
}

#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
Expand Down
2 changes: 0 additions & 2 deletions drivers/gpu/drm/i915/intel_engine_cs.c
Original file line number Diff line number Diff line change
Expand Up @@ -252,8 +252,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
engine->irq_seqno_barrier(engine);

GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request));
engine->timeline->last_submitted_seqno = seqno;

engine->hangcheck.seqno = seqno;

/* After manually advancing the seqno, fake the interrupt in case
Expand Down
4 changes: 2 additions & 2 deletions drivers/gpu/drm/i915/intel_ringbuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,7 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine)
* wtih serialising this hint with anything, so document it as
* a hint and nothing more.
*/
return READ_ONCE(engine->timeline->last_submitted_seqno);
return READ_ONCE(engine->timeline->seqno);
}

int init_workarounds_ring(struct intel_engine_cs *engine);
Expand Down Expand Up @@ -630,7 +630,7 @@ static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine)

void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915);
bool intel_breadcrumbs_busy(struct intel_engine_cs *engine);

static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
{
Expand Down

0 comments on commit 9b6586a

Please sign in to comment.