Skip to content

Commit

Permalink
drm/i915/execlists: Process one CSB update at a time
Browse files Browse the repository at this point in the history
In the next patch, we will process the CSB events directly from the
submission path, rather than only after a CS interrupt. Hence, we will
no longer have the need for a loop until the has-interrupt bit is clear,
and in the meantime can remove that small optimisation.

v2: Tvrtko pointed out it was safer to unconditionally kick the tasklet
after each irq, when assuming that the tasklet is called for each irq.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180628201211.13837-4-chris@chris-wilson.co.uk
  • Loading branch information
Chris Wilson committed Jun 28, 2018
1 parent d8857d5 commit 8ea397f
Show file tree
Hide file tree
Showing 2 changed files with 141 additions and 144 deletions.
7 changes: 4 additions & 3 deletions drivers/gpu/drm/i915/i915_irq.c
Original file line number Diff line number Diff line change
Expand Up @@ -1497,9 +1497,10 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
bool tasklet = false;

if (iir & GT_CONTEXT_SWITCH_INTERRUPT) {
if (READ_ONCE(engine->execlists.active))
tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST,
&engine->irq_posted);
if (READ_ONCE(engine->execlists.active)) {
set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
tasklet = true;
}
}

if (iir & GT_RENDER_USER_INTERRUPT) {
Expand Down
278 changes: 137 additions & 141 deletions drivers/gpu/drm/i915/intel_lrc.c
Original file line number Diff line number Diff line change
Expand Up @@ -955,166 +955,162 @@ static void process_csb(struct intel_engine_cs *engine)
struct intel_engine_execlists * const execlists = &engine->execlists;
struct execlist_port *port = execlists->port;
struct drm_i915_private *i915 = engine->i915;

/* The HWSP contains a (cacheable) mirror of the CSB */
const u32 *buf =
&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
unsigned int head, tail;
bool fw = false;

do {
/* The HWSP contains a (cacheable) mirror of the CSB */
const u32 *buf =
&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
unsigned int head, tail;

/* Clear before reading to catch new interrupts */
clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
smp_mb__after_atomic();

if (unlikely(execlists->csb_use_mmio)) {
if (!fw) {
intel_uncore_forcewake_get(i915, execlists->fw_domains);
fw = true;
}
/* Clear before reading to catch new interrupts */
clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
smp_mb__after_atomic();

buf = (u32 * __force)
(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
if (unlikely(execlists->csb_use_mmio)) {
intel_uncore_forcewake_get(i915, execlists->fw_domains);
fw = true;

head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
tail = GEN8_CSB_WRITE_PTR(head);
head = GEN8_CSB_READ_PTR(head);
execlists->csb_head = head;
} else {
const int write_idx =
intel_hws_csb_write_index(i915) -
I915_HWS_CSB_BUF0_INDEX;
buf = (u32 * __force)
(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));

head = execlists->csb_head;
tail = READ_ONCE(buf[write_idx]);
rmb(); /* Hopefully paired with a wmb() in HW */
}
GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
engine->name,
head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
tail = GEN8_CSB_WRITE_PTR(head);
head = GEN8_CSB_READ_PTR(head);
execlists->csb_head = head;
} else {
const int write_idx =
intel_hws_csb_write_index(i915) -
I915_HWS_CSB_BUF0_INDEX;

while (head != tail) {
struct i915_request *rq;
unsigned int status;
unsigned int count;
head = execlists->csb_head;
tail = READ_ONCE(buf[write_idx]);
rmb(); /* Hopefully paired with a wmb() in HW */
}
GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
engine->name,
head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");

if (++head == GEN8_CSB_ENTRIES)
head = 0;
while (head != tail) {
struct i915_request *rq;
unsigned int status;
unsigned int count;

/*
* We are flying near dragons again.
*
* We hold a reference to the request in execlist_port[]
* but no more than that. We are operating in softirq
* context and so cannot hold any mutex or sleep. That
* prevents us stopping the requests we are processing
* in port[] from being retired simultaneously (the
* breadcrumb will be complete before we see the
* context-switch). As we only hold the reference to the
* request, any pointer chasing underneath the request
* is subject to a potential use-after-free. Thus we
* store all of the bookkeeping within port[] as
* required, and avoid using unguarded pointers beneath
* request itself. The same applies to the atomic
* status notifier.
*/
if (++head == GEN8_CSB_ENTRIES)
head = 0;

status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
engine->name, head,
status, buf[2*head + 1],
execlists->active);

if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
GEN8_CTX_STATUS_PREEMPTED))
execlists_set_active(execlists,
EXECLISTS_ACTIVE_HWACK);
if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
execlists_clear_active(execlists,
EXECLISTS_ACTIVE_HWACK);

if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
continue;
/*
* We are flying near dragons again.
*
* We hold a reference to the request in execlist_port[]
* but no more than that. We are operating in softirq
* context and so cannot hold any mutex or sleep. That
* prevents us stopping the requests we are processing
* in port[] from being retired simultaneously (the
* breadcrumb will be complete before we see the
* context-switch). As we only hold the reference to the
* request, any pointer chasing underneath the request
* is subject to a potential use-after-free. Thus we
* store all of the bookkeeping within port[] as
* required, and avoid using unguarded pointers beneath
* request itself. The same applies to the atomic
* status notifier.
*/

/* We should never get a COMPLETED | IDLE_ACTIVE! */
GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
engine->name, head,
status, buf[2*head + 1],
execlists->active);

if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
GEN8_CTX_STATUS_PREEMPTED))
execlists_set_active(execlists,
EXECLISTS_ACTIVE_HWACK);
if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
execlists_clear_active(execlists,
EXECLISTS_ACTIVE_HWACK);

if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
continue;

if (status & GEN8_CTX_STATUS_COMPLETE &&
buf[2*head + 1] == execlists->preempt_complete_status) {
GEM_TRACE("%s preempt-idle\n", engine->name);
complete_preempt_context(execlists);
continue;
}
/* We should never get a COMPLETED | IDLE_ACTIVE! */
GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);

if (status & GEN8_CTX_STATUS_PREEMPTED &&
execlists_is_active(execlists,
EXECLISTS_ACTIVE_PREEMPT))
continue;
if (status & GEN8_CTX_STATUS_COMPLETE &&
buf[2*head + 1] == execlists->preempt_complete_status) {
GEM_TRACE("%s preempt-idle\n", engine->name);
complete_preempt_context(execlists);
continue;
}

GEM_BUG_ON(!execlists_is_active(execlists,
EXECLISTS_ACTIVE_USER));
if (status & GEN8_CTX_STATUS_PREEMPTED &&
execlists_is_active(execlists,
EXECLISTS_ACTIVE_PREEMPT))
continue;

rq = port_unpack(port, &count);
GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n",
engine->name,
port->context_id, count,
rq ? rq->global_seqno : 0,
rq ? rq->fence.context : 0,
rq ? rq->fence.seqno : 0,
intel_engine_get_seqno(engine),
rq ? rq_prio(rq) : 0);
GEM_BUG_ON(!execlists_is_active(execlists,
EXECLISTS_ACTIVE_USER));

/* Check the context/desc id for this event matches */
GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
rq = port_unpack(port, &count);
GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n",
engine->name,
port->context_id, count,
rq ? rq->global_seqno : 0,
rq ? rq->fence.context : 0,
rq ? rq->fence.seqno : 0,
intel_engine_get_seqno(engine),
rq ? rq_prio(rq) : 0);

/* Check the context/desc id for this event matches */
GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);

GEM_BUG_ON(count == 0);
if (--count == 0) {
/*
* On the final event corresponding to the
* submission of this context, we expect either
* an element-switch event or a completion
* event (and on completion, the active-idle
* marker). No more preemptions, lite-restore
* or otherwise.
*/
GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
GEM_BUG_ON(port_isset(&port[1]) &&
!(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
GEM_BUG_ON(!port_isset(&port[1]) &&
!(status & GEN8_CTX_STATUS_ACTIVE_IDLE));

GEM_BUG_ON(count == 0);
if (--count == 0) {
/*
* On the final event corresponding to the
* submission of this context, we expect either
* an element-switch event or a completion
* event (and on completion, the active-idle
* marker). No more preemptions, lite-restore
* or otherwise.
*/
GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
GEM_BUG_ON(port_isset(&port[1]) &&
!(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
GEM_BUG_ON(!port_isset(&port[1]) &&
!(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
/*
* We rely on the hardware being strongly
* ordered, that the breadcrumb write is
* coherent (visible from the CPU) before the
* user interrupt and CSB is processed.
*/
GEM_BUG_ON(!i915_request_completed(rq));

/*
* We rely on the hardware being strongly
* ordered, that the breadcrumb write is
* coherent (visible from the CPU) before the
* user interrupt and CSB is processed.
*/
GEM_BUG_ON(!i915_request_completed(rq));

execlists_context_schedule_out(rq,
INTEL_CONTEXT_SCHEDULE_OUT);
i915_request_put(rq);

GEM_TRACE("%s completed ctx=%d\n",
engine->name, port->context_id);

port = execlists_port_complete(execlists, port);
if (port_isset(port))
execlists_user_begin(execlists, port);
else
execlists_user_end(execlists);
} else {
port_set(port, port_pack(rq, count));
}
}
execlists_context_schedule_out(rq,
INTEL_CONTEXT_SCHEDULE_OUT);
i915_request_put(rq);

if (head != execlists->csb_head) {
execlists->csb_head = head;
writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
GEM_TRACE("%s completed ctx=%d\n",
engine->name, port->context_id);

port = execlists_port_complete(execlists, port);
if (port_isset(port))
execlists_user_begin(execlists, port);
else
execlists_user_end(execlists);
} else {
port_set(port, port_pack(rq, count));
}
} while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
}

if (head != execlists->csb_head) {
execlists->csb_head = head;
writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
}

if (unlikely(fw))
intel_uncore_forcewake_put(i915, execlists->fw_domains);
Expand Down

0 comments on commit 8ea397f

Please sign in to comment.