Skip to content

Commit

Permalink
drm/i915: Remove obsolete ringbuffer emission for gen8+
Browse files Browse the repository at this point in the history
Since removing the module parameter to force selection of ringbuffer
emission for gen8, the code is defunct. Remove it.

To put the difference into perspective, a couple of microbenchmarks
(bdw i7-5557u, 20170324):
                                        ring          execlists
exec continuous nops on all rings:   1.491us            2.223us
exec sequential nops on each ring:  12.508us           53.682us
single nop + sync:                   9.272us           30.291us

vblank_mode=0 glxgears:            ~11000fps           ~9000fps

Since the earlier submission, gen8 ringbuffer submission has fallen
further and further behind in features. So while ringbuffer may hold the
throughput crown, in terms of interactive latency, execlists is much
better. Alas, we have no convenient metrics for such, other than
demonstrating things we can do with execlists but can not using
legacy ringbuffer submission.

We have made a few improvements to lowlevel execlists throughput,
and ringbuffer currently panics on boot! (bdw i7-5557u, 20171026):

                                        ring          execlists
exec continuous nops on all rings:       n/a            1.921us
exec sequential nops on each ring:       n/a           44.621us
single nop + sync:                       n/a           21.953us

vblank_mode=0 glxgears:                  n/a          ~18500fps

References: https://bugs.freedesktop.org/show_bug.cgi?id=87725
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Once-upon-a-time-Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20171120205504.21892-2-chris@chris-wilson.co.uk
  • Loading branch information
Chris Wilson committed Nov 20, 2017
1 parent fb5c551 commit 79e6770
Show file tree
Hide file tree
Showing 9 changed files with 94 additions and 551 deletions.
44 changes: 6 additions & 38 deletions drivers/gpu/drm/i915/i915_debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -3254,44 +3254,12 @@ static int i915_semaphore_status(struct seq_file *m, void *unused)
return ret;
intel_runtime_pm_get(dev_priv);

if (IS_BROADWELL(dev_priv)) {
struct page *page;
uint64_t *seqno;

page = i915_gem_object_get_page(dev_priv->semaphore->obj, 0);

seqno = (uint64_t *)kmap_atomic(page);
for_each_engine(engine, dev_priv, id) {
uint64_t offset;

seq_printf(m, "%s\n", engine->name);

seq_puts(m, " Last signal:");
for (j = 0; j < num_rings; j++) {
offset = id * I915_NUM_ENGINES + j;
seq_printf(m, "0x%08llx (0x%02llx) ",
seqno[offset], offset * 8);
}
seq_putc(m, '\n');

seq_puts(m, " Last wait: ");
for (j = 0; j < num_rings; j++) {
offset = id + (j * I915_NUM_ENGINES);
seq_printf(m, "0x%08llx (0x%02llx) ",
seqno[offset], offset * 8);
}
seq_putc(m, '\n');

}
kunmap_atomic(seqno);
} else {
seq_puts(m, " Last signal:");
for_each_engine(engine, dev_priv, id)
for (j = 0; j < num_rings; j++)
seq_printf(m, "0x%08x\n",
I915_READ(engine->semaphore.mbox.signal[j]));
seq_putc(m, '\n');
}
seq_puts(m, " Last signal:");
for_each_engine(engine, dev_priv, id)
for (j = 0; j < num_rings; j++)
seq_printf(m, "0x%08x\n",
I915_READ(engine->semaphore.mbox.signal[j]));
seq_putc(m, '\n');

intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev->struct_mutex);
Expand Down
2 changes: 0 additions & 2 deletions drivers/gpu/drm/i915/i915_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -942,7 +942,6 @@ struct i915_gpu_state {
u64 fence[I915_MAX_NUM_FENCES];
struct intel_overlay_error_state *overlay;
struct intel_display_error_state *display;
struct drm_i915_error_object *semaphore;

struct drm_i915_error_engine {
int engine_id;
Expand Down Expand Up @@ -2291,7 +2290,6 @@ struct drm_i915_private {
struct i915_gem_context *kernel_context;
/* Context only to be used for injecting preemption commands */
struct i915_gem_context *preempt_context;
struct i915_vma *semaphore;

struct drm_dma_handle *status_page_dmah;
struct resource mch_res;
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/i915/i915_gem.c
Original file line number Diff line number Diff line change
Expand Up @@ -4999,7 +4999,7 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv)

bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
{
if (INTEL_INFO(dev_priv)->gen < 6)
if (INTEL_GEN(dev_priv) < 6)
return false;

/* TODO: make semaphores and Execlists play nicely together */
Expand Down
47 changes: 6 additions & 41 deletions drivers/gpu/drm/i915/i915_gem_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -574,29 +574,29 @@ mi_set_context(struct drm_i915_gem_request *req, u32 flags)
enum intel_engine_id id;
const int num_rings =
/* Use an extended w/a on gen7 if signalling from other rings */
(i915_modparams.semaphores && INTEL_GEN(dev_priv) == 7) ?
(i915_modparams.semaphores && IS_GEN7(dev_priv)) ?
INTEL_INFO(dev_priv)->num_rings - 1 :
0;
int len;
u32 *cs;

flags |= MI_MM_SPACE_GTT;
if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8)
if (IS_HASWELL(dev_priv))
/* These flags are for resource streamer on HSW+ */
flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN;
else
flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN;

len = 4;
if (INTEL_GEN(dev_priv) >= 7)
if (IS_GEN7(dev_priv))
len += 2 + (num_rings ? 4*num_rings + 6 : 0);

cs = intel_ring_begin(req, len);
if (IS_ERR(cs))
return PTR_ERR(cs);

/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
if (INTEL_GEN(dev_priv) >= 7) {
if (IS_GEN7(dev_priv)) {
*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
if (num_rings) {
struct intel_engine_cs *signaller;
Expand All @@ -623,7 +623,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 flags)
*/
*cs++ = MI_NOOP;

if (INTEL_GEN(dev_priv) >= 7) {
if (IS_GEN7(dev_priv)) {
if (num_rings) {
struct intel_engine_cs *signaller;
i915_reg_t last_reg = {}; /* keep gcc quiet */
Expand Down Expand Up @@ -714,27 +714,7 @@ needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt, struct intel_engine_cs *engine)
if (engine->id != RCS)
return true;

if (INTEL_GEN(engine->i915) < 8)
return true;

return false;
}

static bool
needs_pd_load_post(struct i915_hw_ppgtt *ppgtt,
struct i915_gem_context *to,
u32 hw_flags)
{
if (!ppgtt)
return false;

if (!IS_GEN8(to->i915))
return false;

if (hw_flags & MI_RESTORE_INHIBIT)
return true;

return false;
return true;
}

static int do_rcs_switch(struct drm_i915_gem_request *req)
Expand Down Expand Up @@ -784,21 +764,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
engine->legacy_active_context = to;
}

/* GEN8 does *not* require an explicit reload if the PDPs have been
* setup, and we do not wish to move them.
*/
if (needs_pd_load_post(ppgtt, to, hw_flags)) {
trace_switch_mm(engine, to);
ret = ppgtt->switch_mm(ppgtt, req);
/* The hardware context switch is emitted, but we haven't
* actually changed the state - so it's probably safe to bail
* here. Still, let the user know something dangerous has
* happened.
*/
if (ret)
return ret;
}

if (ppgtt)
ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);

Expand Down
36 changes: 0 additions & 36 deletions drivers/gpu/drm/i915/i915_gpu_error.c
Original file line number Diff line number Diff line change
Expand Up @@ -793,8 +793,6 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
"WA batchbuffer", ee->wa_batchbuffer);
}

print_error_obj(m, NULL, "Semaphores", error->semaphore);

if (error->overlay)
intel_overlay_print_error_state(m, error->overlay);

Expand Down Expand Up @@ -903,8 +901,6 @@ void __i915_gpu_state_free(struct kref *error_ref)
kfree(ee->waiters);
}

i915_error_object_free(error->semaphore);

for (i = 0; i < ARRAY_SIZE(error->active_bo); i++)
kfree(error->active_bo[i]);
kfree(error->pinned_bo);
Expand Down Expand Up @@ -1116,34 +1112,6 @@ gen8_engine_sync_index(struct intel_engine_cs *engine,
return idx;
}

static void gen8_record_semaphore_state(struct i915_gpu_state *error,
struct intel_engine_cs *engine,
struct drm_i915_error_engine *ee)
{
struct drm_i915_private *dev_priv = engine->i915;
struct intel_engine_cs *to;
enum intel_engine_id id;

if (!error->semaphore)
return;

for_each_engine(to, dev_priv, id) {
int idx;
u16 signal_offset;
u32 *tmp;

if (engine == to)
continue;

signal_offset =
(GEN8_SIGNAL_OFFSET(engine, id) & (PAGE_SIZE - 1)) / 4;
tmp = error->semaphore->pages[0];
idx = gen8_engine_sync_index(engine, to);

ee->semaphore_mboxes[idx] = tmp[signal_offset];
}
}

static void gen6_record_semaphore_state(struct intel_engine_cs *engine,
struct drm_i915_error_engine *ee)
{
Expand Down Expand Up @@ -1218,7 +1186,6 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
if (INTEL_GEN(dev_priv) >= 6) {
ee->rc_psmi = I915_READ(RING_PSMI_CTL(engine->mmio_base));
if (INTEL_GEN(dev_priv) >= 8) {
gen8_record_semaphore_state(error, engine, ee);
ee->fault_reg = I915_READ(GEN8_RING_FAULT_REG);
} else {
gen6_record_semaphore_state(engine, ee);
Expand Down Expand Up @@ -1453,9 +1420,6 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
struct i915_ggtt *ggtt = &dev_priv->ggtt;
int i;

error->semaphore =
i915_error_object_create(dev_priv, dev_priv->semaphore);

for (i = 0; i < I915_NUM_ENGINES; i++) {
struct intel_engine_cs *engine = dev_priv->engine[i];
struct drm_i915_error_engine *ee = &error->engine[i];
Expand Down
14 changes: 0 additions & 14 deletions drivers/gpu/drm/i915/intel_engine_cs.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@
* Resource Streamer, is 66944 bytes, which rounds to 17 pages.
*/
#define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE)
/* Same as Haswell, but 72064 bytes now. */
#define GEN8_CXT_TOTAL_SIZE (18 * PAGE_SIZE)

#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
Expand Down Expand Up @@ -364,18 +362,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
if (HAS_VEBOX(dev_priv))
I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
}
if (dev_priv->semaphore) {
struct page *page = i915_vma_first_page(dev_priv->semaphore);
void *semaphores;

/* Semaphores are in noncoherent memory, flush to be safe */
semaphores = kmap_atomic(page);
memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
0, I915_NUM_ENGINES * gen8_semaphore_seqno_size);
drm_clflush_virt_range(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
I915_NUM_ENGINES * gen8_semaphore_seqno_size);
kunmap_atomic(semaphores);
}

intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
Expand Down
44 changes: 12 additions & 32 deletions drivers/gpu/drm/i915/intel_hangcheck.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,45 +27,30 @@
static bool
ipehr_is_semaphore_wait(struct intel_engine_cs *engine, u32 ipehr)
{
if (INTEL_GEN(engine->i915) >= 8) {
return (ipehr >> 23) == 0x1c;
} else {
ipehr &= ~MI_SEMAPHORE_SYNC_MASK;
return ipehr == (MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE |
MI_SEMAPHORE_REGISTER);
}
ipehr &= ~MI_SEMAPHORE_SYNC_MASK;
return ipehr == (MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE |
MI_SEMAPHORE_REGISTER);
}

static struct intel_engine_cs *
semaphore_wait_to_signaller_ring(struct intel_engine_cs *engine, u32 ipehr,
u64 offset)
{
struct drm_i915_private *dev_priv = engine->i915;
u32 sync_bits = ipehr & MI_SEMAPHORE_SYNC_MASK;
struct intel_engine_cs *signaller;
enum intel_engine_id id;

if (INTEL_GEN(dev_priv) >= 8) {
for_each_engine(signaller, dev_priv, id) {
if (engine == signaller)
continue;

if (offset == signaller->semaphore.signal_ggtt[engine->hw_id])
return signaller;
}
} else {
u32 sync_bits = ipehr & MI_SEMAPHORE_SYNC_MASK;

for_each_engine(signaller, dev_priv, id) {
if(engine == signaller)
continue;
for_each_engine(signaller, dev_priv, id) {
if (engine == signaller)
continue;

if (sync_bits == signaller->semaphore.mbox.wait[engine->hw_id])
return signaller;
}
if (sync_bits == signaller->semaphore.mbox.wait[engine->hw_id])
return signaller;
}

DRM_DEBUG_DRIVER("No signaller ring found for %s, ipehr 0x%08x, offset 0x%016llx\n",
engine->name, ipehr, offset);
DRM_DEBUG_DRIVER("No signaller ring found for %s, ipehr 0x%08x\n",
engine->name, ipehr);

return ERR_PTR(-ENODEV);
}
Expand Down Expand Up @@ -135,11 +120,6 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno)
return NULL;

*seqno = ioread32(vaddr + head + 4) + 1;
if (INTEL_GEN(dev_priv) >= 8) {
offset = ioread32(vaddr + head + 12);
offset <<= 32;
offset |= ioread32(vaddr + head + 8);
}
return semaphore_wait_to_signaller_ring(engine, ipehr, offset);
}

Expand Down Expand Up @@ -273,7 +253,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
return ENGINE_WAIT_KICK;
}

if (INTEL_GEN(dev_priv) >= 6 && tmp & RING_WAIT_SEMAPHORE) {
if (IS_GEN(dev_priv, 6, 7) && tmp & RING_WAIT_SEMAPHORE) {
switch (semaphore_passed(engine)) {
default:
return ENGINE_DEAD;
Expand Down
Loading

0 comments on commit 79e6770

Please sign in to comment.