Skip to content

Commit

Permalink
drm/i915: Mark i915_request.timeline as a volatile, rcu pointer
Browse files Browse the repository at this point in the history
The request->timeline is only valid until the request is retired (i.e.
before it is completed). Upon retiring the request, the context may be
unpinned and freed, and along with it the timeline may be freed. We
therefore need to be very careful when chasing rq->timeline that the
pointer does not disappear beneath us. The vast majority of users are in
a protected context, either during request construction or retirement,
where the timeline->mutex is held and the timeline cannot disappear. It
is those few off the beaten path (where we access a second timeline) that
need extra scrutiny -- to be added in the next patch after first adding
the warnings about dangerous access.

One complication, where we cannot use the timeline->mutex itself, is
during request submission onto hardware (under spinlocks). Here, we want
to check on the timeline to finalize the breadcrumb, and so we need to
impose a second rule to ensure that the request->timeline is indeed
valid. As we are submitting the request, it's context and timeline must
be pinned, as it will be used by the hardware. Since it is pinned, we
know the request->timeline must still be valid, and we cannot submit the
idle barrier until after we release the engine->active.lock, ergo while
submitting and holding that spinlock, a second thread cannot release the
timeline.

v2: Don't be lazy inside selftests; hold the timeline->mutex for as long
as we need it, and tidy up acquiring the timeline with a bit of
refactoring (i915_active_add_request)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190919111912.21631-1-chris@chris-wilson.co.uk
  • Loading branch information
Chris Wilson committed Sep 20, 2019
1 parent c45e788 commit d19d71f
Show file tree
Hide file tree
Showing 20 changed files with 147 additions and 63 deletions.
2 changes: 1 addition & 1 deletion drivers/gpu/drm/i915/display/intel_overlay.c
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ alloc_request(struct intel_overlay *overlay, void (*fn)(struct intel_overlay *))
if (IS_ERR(rq))
return rq;

err = i915_active_ref(&overlay->last_flip, rq->timeline, rq);
err = i915_active_add_request(&overlay->last_flip, rq);
if (err) {
i915_request_add(rq);
return ERR_PTR(err);
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ static void clear_pages_worker(struct work_struct *work)
* keep track of the GPU activity within this vma/request, and
* propagate the signal from the request to w->dma.
*/
err = i915_active_ref(&vma->active, rq->timeline, rq);
err = i915_active_add_request(&vma->active, rq);
if (err)
goto out_request;

Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/i915/gem/i915_gem_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -910,7 +910,7 @@ static int context_barrier_task(struct i915_gem_context *ctx,
if (emit)
err = emit(rq, data);
if (err == 0)
err = i915_active_ref(&cb->base, rq->timeline, rq);
err = i915_active_add_request(&cb->base, rq);

i915_request_add(rq);
if (err)
Expand Down
4 changes: 2 additions & 2 deletions drivers/gpu/drm/i915/gt/intel_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
/* Only suitable for use in remotely modifying this context */
GEM_BUG_ON(rq->hw_context == ce);

if (rq->timeline != tl) { /* beware timeline sharing */
if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */
err = mutex_lock_interruptible_nested(&tl->mutex,
SINGLE_DEPTH_NESTING);
if (err)
Expand All @@ -319,7 +319,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
* words transfer the pinned ce object to tracked active request.
*/
GEM_BUG_ON(i915_active_is_idle(&ce->active));
return i915_active_ref(&ce->active, rq->timeline, rq);
return i915_active_add_request(&ce->active, rq);
}

struct i915_request *intel_context_create_request(struct intel_context *ce)
Expand Down
57 changes: 50 additions & 7 deletions drivers/gpu/drm/i915/gt/intel_engine_cs.c
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,8 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
engine->status_page.vma))
goto out_frame;

mutex_lock(&frame->timeline.mutex);

frame->ring.vaddr = frame->cs;
frame->ring.size = sizeof(frame->cs);
frame->ring.effective_size = frame->ring.size;
Expand All @@ -688,18 +690,22 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
frame->rq.i915 = engine->i915;
frame->rq.engine = engine;
frame->rq.ring = &frame->ring;
frame->rq.timeline = &frame->timeline;
rcu_assign_pointer(frame->rq.timeline, &frame->timeline);

dw = intel_timeline_pin(&frame->timeline);
if (dw < 0)
goto out_timeline;

spin_lock_irq(&engine->active.lock);
dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
spin_unlock_irq(&engine->active.lock);

GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */

intel_timeline_unpin(&frame->timeline);

out_timeline:
mutex_unlock(&frame->timeline.mutex);
intel_timeline_fini(&frame->timeline);
out_frame:
kfree(frame);
Expand Down Expand Up @@ -1196,6 +1202,27 @@ static void hexdump(struct drm_printer *m, const void *buf, size_t len)
}
}

static struct intel_timeline *get_timeline(struct i915_request *rq)
{
struct intel_timeline *tl;

/*
* Even though we are holding the engine->active.lock here, there
* is no control over the submission queue per-se and we are
* inspecting the active state at a random point in time, with an
* unknown queue. Play safe and make sure the timeline remains valid.
* (Only being used for pretty printing, one extra kref shouldn't
* cause a camel stampede!)
*/
rcu_read_lock();
tl = rcu_dereference(rq->timeline);
if (!kref_get_unless_zero(&tl->kref))
tl = NULL;
rcu_read_unlock();

return tl;
}

static void intel_engine_print_registers(struct intel_engine_cs *engine,
struct drm_printer *m)
{
Expand Down Expand Up @@ -1290,27 +1317,37 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
int len;

len = snprintf(hdr, sizeof(hdr),
"\t\tActive[%d: ",
"\t\tActive[%d]: ",
(int)(port - execlists->active));
if (!i915_request_signaled(rq))
if (!i915_request_signaled(rq)) {
struct intel_timeline *tl = get_timeline(rq);

len += snprintf(hdr + len, sizeof(hdr) - len,
"ring:{start:%08x, hwsp:%08x, seqno:%08x}, ",
i915_ggtt_offset(rq->ring->vma),
rq->timeline->hwsp_offset,
tl ? tl->hwsp_offset : 0,
hwsp_seqno(rq));

if (tl)
intel_timeline_put(tl);
}
snprintf(hdr + len, sizeof(hdr) - len, "rq: ");
print_request(m, rq, hdr);
}
for (port = execlists->pending; (rq = *port); port++) {
struct intel_timeline *tl = get_timeline(rq);
char hdr[80];

snprintf(hdr, sizeof(hdr),
"\t\tPending[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
(int)(port - execlists->pending),
i915_ggtt_offset(rq->ring->vma),
rq->timeline->hwsp_offset,
tl ? tl->hwsp_offset : 0,
hwsp_seqno(rq));
print_request(m, rq, hdr);

if (tl)
intel_timeline_put(tl);
}
spin_unlock_irqrestore(&engine->active.lock, flags);
} else if (INTEL_GEN(dev_priv) > 6) {
Expand Down Expand Up @@ -1388,6 +1425,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,
spin_lock_irqsave(&engine->active.lock, flags);
rq = intel_engine_find_active_request(engine);
if (rq) {
struct intel_timeline *tl = get_timeline(rq);

print_request(m, rq, "\t\tactive ");

drm_printf(m, "\t\tring->start: 0x%08x\n",
Expand All @@ -1400,8 +1439,12 @@ void intel_engine_dump(struct intel_engine_cs *engine,
rq->ring->emit);
drm_printf(m, "\t\tring->space: 0x%08x\n",
rq->ring->space);
drm_printf(m, "\t\tring->hwsp: 0x%08x\n",
rq->timeline->hwsp_offset);

if (tl) {
drm_printf(m, "\t\tring->hwsp: 0x%08x\n",
tl->hwsp_offset);
intel_timeline_put(tl);
}

print_request_ring(m, rq);

Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/i915/gt/intel_engine_pm.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
/* Context switch failed, hope for the best! Maybe reset? */
goto out_unlock;

intel_timeline_enter(rq->timeline);
intel_timeline_enter(i915_request_timeline(rq));

/* Check again on the next retirement. */
engine->wakeref_serial = engine->serial + 1;
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/i915/gt/intel_engine_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ static inline int
intel_engine_pool_mark_active(struct intel_engine_pool_node *node,
struct i915_request *rq)
{
return i915_active_ref(&node->active, rq->timeline, rq);
return i915_active_add_request(&node->active, rq);
}

static inline void
Expand Down
17 changes: 9 additions & 8 deletions drivers/gpu/drm/i915/gt/intel_lrc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1825,7 +1825,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
{
u32 *cs;

GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb);
GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb);

cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
Expand All @@ -1841,7 +1841,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
*cs++ = MI_NOOP;

*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = rq->timeline->hwsp_offset;
*cs++ = i915_request_timeline(rq)->hwsp_offset;
*cs++ = 0;
*cs++ = rq->fence.seqno - 1;

Expand Down Expand Up @@ -2324,7 +2324,8 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)

static struct i915_request *active_request(struct i915_request *rq)
{
const struct list_head * const list = &rq->timeline->requests;
const struct list_head * const list =
&i915_request_active_timeline(rq)->requests;
const struct intel_context * const ce = rq->hw_context;
struct i915_request *active = NULL;

Expand Down Expand Up @@ -2927,7 +2928,7 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
{
cs = gen8_emit_ggtt_write(cs,
request->fence.seqno,
request->timeline->hwsp_offset,
i915_request_active_timeline(request)->hwsp_offset,
0);

return gen8_emit_fini_breadcrumb_footer(request, cs);
Expand All @@ -2944,7 +2945,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
cs = gen8_emit_ggtt_write_rcs(cs,
request->fence.seqno,
request->timeline->hwsp_offset,
i915_request_active_timeline(request)->hwsp_offset,
PIPE_CONTROL_FLUSH_ENABLE |
PIPE_CONTROL_CS_STALL);

Expand All @@ -2956,7 +2957,7 @@ gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{
cs = gen8_emit_ggtt_write_rcs(cs,
request->fence.seqno,
request->timeline->hwsp_offset,
i915_request_active_timeline(request)->hwsp_offset,
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
Expand Down Expand Up @@ -3020,7 +3021,7 @@ static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
{
cs = gen8_emit_ggtt_write(cs,
request->fence.seqno,
request->timeline->hwsp_offset,
i915_request_active_timeline(request)->hwsp_offset,
0);

return gen12_emit_fini_breadcrumb_footer(request, cs);
Expand All @@ -3031,7 +3032,7 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{
cs = gen8_emit_ggtt_write_rcs(cs,
request->fence.seqno,
request->timeline->hwsp_offset,
i915_request_active_timeline(request)->hwsp_offset,
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
Expand Down
27 changes: 15 additions & 12 deletions drivers/gpu/drm/i915/gt/intel_ringbuffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,8 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_QW_WRITE |
PIPE_CONTROL_CS_STALL);
*cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT;
*cs++ = i915_request_active_timeline(rq)->hwsp_offset |
PIPE_CONTROL_GLOBAL_GTT;
*cs++ = rq->fence.seqno;

*cs++ = MI_USER_INTERRUPT;
Expand Down Expand Up @@ -425,7 +426,7 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
PIPE_CONTROL_QW_WRITE |
PIPE_CONTROL_GLOBAL_GTT_IVB |
PIPE_CONTROL_CS_STALL);
*cs++ = rq->timeline->hwsp_offset;
*cs++ = i915_request_active_timeline(rq)->hwsp_offset;
*cs++ = rq->fence.seqno;

*cs++ = MI_USER_INTERRUPT;
Expand All @@ -439,8 +440,8 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)

static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);

*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
Expand All @@ -459,8 +460,8 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
int i;

GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);

*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
Expand Down Expand Up @@ -938,8 +939,8 @@ static void i9xx_submit_request(struct i915_request *request)

static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);

*cs++ = MI_FLUSH;

Expand All @@ -961,8 +962,8 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
int i;

GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);

*cs++ = MI_FLUSH;

Expand Down Expand Up @@ -1815,7 +1816,7 @@ static int ring_request_alloc(struct i915_request *request)
int ret;

GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
GEM_BUG_ON(request->timeline->has_initial_breadcrumb);
GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb);

/*
* Flush enough space to reduce the likelihood of waiting after
Expand Down Expand Up @@ -1926,7 +1927,9 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
*/
GEM_BUG_ON(!rq->reserved_space);

ret = wait_for_space(ring, rq->timeline, total_bytes);
ret = wait_for_space(ring,
i915_request_timeline(rq),
total_bytes);
if (unlikely(ret))
return ERR_PTR(ret);
}
Expand Down
6 changes: 3 additions & 3 deletions drivers/gpu/drm/i915/gt/intel_timeline.c
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,7 @@ int intel_timeline_get_seqno(struct intel_timeline *tl,
static int cacheline_ref(struct intel_timeline_cacheline *cl,
struct i915_request *rq)
{
return i915_active_ref(&cl->active, rq->timeline, rq);
return i915_active_add_request(&cl->active, rq);
}

int intel_timeline_read_hwsp(struct i915_request *from,
Expand All @@ -504,7 +504,7 @@ int intel_timeline_read_hwsp(struct i915_request *from,
struct intel_timeline *tl = from->timeline;
int err;

GEM_BUG_ON(to->timeline == tl);
GEM_BUG_ON(rcu_access_pointer(to->timeline) == tl);

mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
err = i915_request_completed(from);
Expand Down Expand Up @@ -541,7 +541,7 @@ void __intel_timeline_free(struct kref *kref)
container_of(kref, typeof(*timeline), kref);

intel_timeline_fini(timeline);
kfree(timeline);
kfree_rcu(timeline, rcu);
}

static void timelines_fini(struct intel_gt *gt)
Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/i915/gt/intel_timeline_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ struct intel_timeline {
struct intel_gt *gt;

struct kref kref;
struct rcu_head rcu;
};

#endif /* __I915_TIMELINE_TYPES_H__ */
Loading

0 comments on commit d19d71f

Please sign in to comment.