Skip to content

Commit

Permalink
drm/i915: Bump ready tasks ahead of busywaits
Browse files Browse the repository at this point in the history
Consider two tasks that are running in parallel on a pair of engines
(vcs0, vcs1), but then must complete on a shared engine (rcs0). To
maximise throughput, we want to run the first ready task on rcs0 (i.e.
the first task that completes on either of vcs0 or vcs1). When using
semaphores, however, we will instead queue onto rcs in submission order.

To resolve this incorrect ordering, we want to re-evaluate the priority
queue when each of the request is ready. Normally this happens because
we only insert into the priority queue requests that are ready, but with
semaphores we are inserting ahead of their readiness and to compensate
we penalize those tasks with reduced priority (so that tasks that do not
need to busywait should naturally be run first). However, given a series
of tasks that each use semaphores, the queue degrades into submission
fifo rather than readiness fifo, and so to counter this we give a small
boost to semaphore users as their dependent tasks are completed (and so
we no longer require any busywait prior to running the user task as they
are then ready themselves).

v2: Fixup irqsave for schedule_lock (Tvrtko)

Testcase: igt/gem_exec_schedule/semaphore-codependency
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Dmitry Ermilov <dmitry.ermilov@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190409152922.23894-1-chris@chris-wilson.co.uk
  • Loading branch information
Chris Wilson committed Apr 11, 2019
1 parent 9726920 commit b7404c7
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 10 deletions.
40 changes: 40 additions & 0 deletions drivers/gpu/drm/i915/i915_request.c
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,36 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
return NOTIFY_DONE;
}

static int __i915_sw_fence_call
semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
{
struct i915_request *request =
container_of(fence, typeof(*request), semaphore);

switch (state) {
case FENCE_COMPLETE:
/*
* We only check a small portion of our dependencies
* and so cannot guarantee that there remains no
* semaphore chain across all. Instead of opting
* for the full NOSEMAPHORE boost, we go for the
* smaller (but still preempting) boost of
* NEWCLIENT. This will be enough to boost over
* a busywaiting request (as that cannot be
* NEWCLIENT) without accidentally boosting
* a busywait over real work elsewhere.
*/
i915_schedule_bump_priority(request, I915_PRIORITY_NEWCLIENT);
break;

case FENCE_FREE:
i915_request_put(request);
break;
}

return NOTIFY_DONE;
}

static void ring_retire_requests(struct intel_ring *ring)
{
struct i915_request *rq, *rn;
Expand Down Expand Up @@ -702,6 +732,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)

/* We bump the ref for the fence chain */
i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
i915_sw_fence_init(&i915_request_get(rq)->semaphore, semaphore_notify);

i915_sched_node_init(&rq->sched);

Expand Down Expand Up @@ -784,6 +815,12 @@ emit_semaphore_wait(struct i915_request *to,
&from->fence, 0,
I915_FENCE_GFP);

err = i915_sw_fence_await_dma_fence(&to->semaphore,
&from->fence, 0,
I915_FENCE_GFP);
if (err < 0)
return err;

/* We need to pin the signaler's HWSP until we are finished reading. */
err = i915_timeline_read_hwsp(from, to, &hwsp_offset);
if (err)
Expand Down Expand Up @@ -1114,6 +1151,7 @@ void i915_request_add(struct i915_request *request)
* run at the earliest possible convenience.
*/
local_bh_disable();
i915_sw_fence_commit(&request->semaphore);
rcu_read_lock(); /* RCU serialisation for set-wedged protection */
if (engine->schedule) {
struct i915_sched_attr attr = request->gem_context->sched;
Expand Down Expand Up @@ -1320,7 +1358,9 @@ long i915_request_wait(struct i915_request *rq,
if (flags & I915_WAIT_PRIORITY) {
if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6)
gen6_rps_boost(rq);
local_bh_disable(); /* suspend tasklets for reprioritisation */
i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
local_bh_enable(); /* kick tasklets en masse */
}

wait.tsk = current;
Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/i915/i915_request.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ struct i915_request {
struct i915_sw_dma_fence_cb dmaq;
};
struct list_head execute_cb;
struct i915_sw_fence semaphore;

/*
* A list of everyone we wait upon, and everyone who waits upon us.
Expand Down
21 changes: 11 additions & 10 deletions drivers/gpu/drm/i915/i915_scheduler.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
{
bool ret = false;

spin_lock(&schedule_lock);
spin_lock_irq(&schedule_lock);

if (!node_signaled(signal)) {
INIT_LIST_HEAD(&dep->dfs_link);
Expand All @@ -81,7 +81,7 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
ret = true;
}

spin_unlock(&schedule_lock);
spin_unlock_irq(&schedule_lock);

return ret;
}
Expand All @@ -108,7 +108,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)

GEM_BUG_ON(!list_empty(&node->link));

spin_lock(&schedule_lock);
spin_lock_irq(&schedule_lock);

/*
* Everyone we depended upon (the fences we wait to be signaled)
Expand All @@ -135,7 +135,7 @@ void i915_sched_node_fini(struct i915_sched_node *node)
i915_dependency_free(dep);
}

spin_unlock(&schedule_lock);
spin_unlock_irq(&schedule_lock);
}

static inline struct i915_priolist *to_priolist(struct rb_node *rb)
Expand Down Expand Up @@ -356,7 +356,7 @@ static void __i915_schedule(struct i915_request *rq,

memset(&cache, 0, sizeof(cache));
engine = rq->engine;
spin_lock_irq(&engine->timeline.lock);
spin_lock(&engine->timeline.lock);

/* Fifo and depth-first replacement ensure our deps execute before us */
list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
Expand Down Expand Up @@ -407,32 +407,33 @@ static void __i915_schedule(struct i915_request *rq,
tasklet_hi_schedule(&engine->execlists.tasklet);
}

spin_unlock_irq(&engine->timeline.lock);
spin_unlock(&engine->timeline.lock);
}

void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
{
spin_lock(&schedule_lock);
spin_lock_irq(&schedule_lock);
__i915_schedule(rq, attr);
spin_unlock(&schedule_lock);
spin_unlock_irq(&schedule_lock);
}

void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
{
struct i915_sched_attr attr;
unsigned long flags;

GEM_BUG_ON(bump & ~I915_PRIORITY_MASK);

if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID)
return;

spin_lock_bh(&schedule_lock);
spin_lock_irqsave(&schedule_lock, flags);

attr = rq->sched.attr;
attr.priority |= bump;
__i915_schedule(rq, &attr);

spin_unlock_bh(&schedule_lock);
spin_unlock_irqrestore(&schedule_lock, flags);
}

void __i915_priolist_free(struct i915_priolist *p)
Expand Down

0 comments on commit b7404c7

Please sign in to comment.