Skip to content

Commit

Permalink
drm/i915/gem: Implement legacy MI_STORE_DATA_IMM
Browse files Browse the repository at this point in the history
The older arches did not convert MI_STORE_DATA_IMM to using the GTT, but
left them writing to a physical address. The notes suggest that the
primary reason would be so that the writes were cache coherent, as the
CPU cache uses physical tagging. As such we did not implement the
legacy variant of MI_STORE_DATA_IMM and so left all the relocations
synchronous -- but with a small function to convert from the vma address
into the physical address, we can implement asynchronous relocs on these
older arches, fixing up a few tests that require them.

In order to be able to test the legacy paths, refactor the gpu
relocations so that we can hook them up to a selftest.

v2: Use an array of offsets not enum labels for the selftest
v3: Refactor the common igt_hexdump()

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/757
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200504140629.28240-1-chris@chris-wilson.co.uk
  • Loading branch information
Chris Wilson committed May 4, 2020
1 parent f5b62bd commit e3d2913
Show file tree
Hide file tree
Showing 7 changed files with 336 additions and 135 deletions.
204 changes: 130 additions & 74 deletions drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -955,7 +955,7 @@ static void reloc_cache_init(struct reloc_cache *cache,
cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
cache->node.flags = 0;
cache->rq = NULL;
cache->rq_size = 0;
cache->target = NULL;
}

static inline void *unmask_page(unsigned long p)
Expand Down Expand Up @@ -1325,7 +1325,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,

ce = intel_context_create(engine);
if (IS_ERR(ce)) {
err = PTR_ERR(rq);
err = PTR_ERR(ce);
goto err_unpin;
}

Expand Down Expand Up @@ -1376,6 +1376,11 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
return err;
}

static bool reloc_can_use_engine(const struct intel_engine_cs *engine)
{
return engine->class != VIDEO_DECODE_CLASS || !IS_GEN(engine->i915, 6);
}

static u32 *reloc_gpu(struct i915_execbuffer *eb,
struct i915_vma *vma,
unsigned int len)
Expand All @@ -1387,9 +1392,9 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
if (unlikely(!cache->rq)) {
struct intel_engine_cs *engine = eb->engine;

if (!intel_engine_can_store_dword(engine)) {
if (!reloc_can_use_engine(engine)) {
engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
if (!engine || !intel_engine_can_store_dword(engine))
if (!engine)
return ERR_PTR(-ENODEV);
}

Expand Down Expand Up @@ -1435,91 +1440,138 @@ static inline bool use_reloc_gpu(struct i915_vma *vma)
return !dma_resv_test_signaled_rcu(vma->resv, true);
}

static u64
relocate_entry(struct i915_vma *vma,
const struct drm_i915_gem_relocation_entry *reloc,
struct i915_execbuffer *eb,
const struct i915_vma *target)
static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset)
{
u64 offset = reloc->offset;
u64 target_offset = relocation_target(reloc, target);
bool wide = eb->reloc_cache.use_64bit_reloc;
void *vaddr;
struct page *page;
unsigned long addr;

if (!eb->reloc_cache.vaddr && use_reloc_gpu(vma)) {
const unsigned int gen = eb->reloc_cache.gen;
unsigned int len;
u32 *batch;
u64 addr;
GEM_BUG_ON(vma->pages != vma->obj->mm.pages);

if (wide)
len = offset & 7 ? 8 : 5;
else if (gen >= 4)
len = 4;
else
len = 3;
page = i915_gem_object_get_page(vma->obj, offset >> PAGE_SHIFT);
addr = PFN_PHYS(page_to_pfn(page));
GEM_BUG_ON(overflows_type(addr, u32)); /* expected dma32 */

batch = reloc_gpu(eb, vma, len);
if (IS_ERR(batch))
goto repeat;
return addr + offset_in_page(offset);
}

static bool __reloc_entry_gpu(struct i915_execbuffer *eb,
struct i915_vma *vma,
u64 offset,
u64 target_addr)
{
const unsigned int gen = eb->reloc_cache.gen;
unsigned int len;
u32 *batch;
u64 addr;

if (gen >= 8)
len = offset & 7 ? 8 : 5;
else if (gen >= 4)
len = 4;
else
len = 3;

batch = reloc_gpu(eb, vma, len);
if (IS_ERR(batch))
return false;

addr = gen8_canonical_addr(vma->node.start + offset);
if (gen >= 8) {
if (offset & 7) {
*batch++ = MI_STORE_DWORD_IMM_GEN4;
*batch++ = lower_32_bits(addr);
*batch++ = upper_32_bits(addr);
*batch++ = lower_32_bits(target_addr);

addr = gen8_canonical_addr(addr + 4);

addr = gen8_canonical_addr(vma->node.start + offset);
if (wide) {
if (offset & 7) {
*batch++ = MI_STORE_DWORD_IMM_GEN4;
*batch++ = lower_32_bits(addr);
*batch++ = upper_32_bits(addr);
*batch++ = lower_32_bits(target_offset);

addr = gen8_canonical_addr(addr + 4);

*batch++ = MI_STORE_DWORD_IMM_GEN4;
*batch++ = lower_32_bits(addr);
*batch++ = upper_32_bits(addr);
*batch++ = upper_32_bits(target_offset);
} else {
*batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1;
*batch++ = lower_32_bits(addr);
*batch++ = upper_32_bits(addr);
*batch++ = lower_32_bits(target_offset);
*batch++ = upper_32_bits(target_offset);
}
} else if (gen >= 6) {
*batch++ = MI_STORE_DWORD_IMM_GEN4;
*batch++ = 0;
*batch++ = addr;
*batch++ = target_offset;
} else if (gen >= 4) {
*batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*batch++ = 0;
*batch++ = addr;
*batch++ = target_offset;
*batch++ = lower_32_bits(addr);
*batch++ = upper_32_bits(addr);
*batch++ = upper_32_bits(target_addr);
} else {
*batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
*batch++ = addr;
*batch++ = target_offset;
*batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1;
*batch++ = lower_32_bits(addr);
*batch++ = upper_32_bits(addr);
*batch++ = lower_32_bits(target_addr);
*batch++ = upper_32_bits(target_addr);
}

goto out;
} else if (gen >= 6) {
*batch++ = MI_STORE_DWORD_IMM_GEN4;
*batch++ = 0;
*batch++ = addr;
*batch++ = target_addr;
} else if (IS_I965G(eb->i915)) {
*batch++ = MI_STORE_DWORD_IMM_GEN4;
*batch++ = 0;
*batch++ = vma_phys_addr(vma, offset);
*batch++ = target_addr;
} else if (gen >= 4) {
*batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*batch++ = 0;
*batch++ = addr;
*batch++ = target_addr;
} else if (gen >= 3 &&
!(IS_I915G(eb->i915) || IS_I915GM(eb->i915))) {
*batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
*batch++ = addr;
*batch++ = target_addr;
} else {
*batch++ = MI_STORE_DWORD_IMM;
*batch++ = vma_phys_addr(vma, offset);
*batch++ = target_addr;
}

return true;
}

static bool reloc_entry_gpu(struct i915_execbuffer *eb,
struct i915_vma *vma,
u64 offset,
u64 target_addr)
{
if (eb->reloc_cache.vaddr)
return false;

if (!use_reloc_gpu(vma))
return false;

return __reloc_entry_gpu(eb, vma, offset, target_addr);
}

static u64
relocate_entry(struct i915_vma *vma,
const struct drm_i915_gem_relocation_entry *reloc,
struct i915_execbuffer *eb,
const struct i915_vma *target)
{
u64 target_addr = relocation_target(reloc, target);
u64 offset = reloc->offset;

if (!reloc_entry_gpu(eb, vma, offset, target_addr)) {
bool wide = eb->reloc_cache.use_64bit_reloc;
void *vaddr;

repeat:
vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT);
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
vaddr = reloc_vaddr(vma->obj,
&eb->reloc_cache,
offset >> PAGE_SHIFT);
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);

clflush_write32(vaddr + offset_in_page(offset),
lower_32_bits(target_offset),
eb->reloc_cache.vaddr);
GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32)));
clflush_write32(vaddr + offset_in_page(offset),
lower_32_bits(target_addr),
eb->reloc_cache.vaddr);

if (wide) {
offset += sizeof(u32);
target_offset >>= 32;
wide = false;
goto repeat;
if (wide) {
offset += sizeof(u32);
target_addr >>= 32;
wide = false;
goto repeat;
}
}

out:
return target->node.start | UPDATE;
}

Expand Down Expand Up @@ -3022,3 +3074,7 @@ end:;
kvfree(exec2_list);
return err;
}

#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/i915_gem_execbuffer.c"
#endif
31 changes: 1 addition & 30 deletions drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
Original file line number Diff line number Diff line change
Expand Up @@ -302,35 +302,6 @@ static void fill_scratch(struct tiled_blits *t, u32 *vaddr, u32 val)
i915_gem_object_flush_map(t->scratch.vma->obj);
}

static void hexdump(const void *buf, size_t len)
{
const size_t rowsize = 8 * sizeof(u32);
const void *prev = NULL;
bool skip = false;
size_t pos;

for (pos = 0; pos < len; pos += rowsize) {
char line[128];

if (prev && !memcmp(prev, buf + pos, rowsize)) {
if (!skip) {
pr_info("*\n");
skip = true;
}
continue;
}

WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
rowsize, sizeof(u32),
line, sizeof(line),
false) >= sizeof(line));
pr_info("[%04zx] %s\n", pos, line);

prev = buf + pos;
skip = false;
}
}

static u64 swizzle_bit(unsigned int bit, u64 offset)
{
return (offset & BIT_ULL(bit)) >> (bit - 6);
Expand Down Expand Up @@ -426,7 +397,7 @@ static int verify_buffer(const struct tiled_blits *t,
pr_err("Invalid %s tiling detected at (%d, %d), start_val %x\n",
repr_tiling(buf->tiling),
x, y, buf->start_val);
hexdump(vaddr, 4096);
igt_hexdump(vaddr, 4096);
}

i915_gem_object_unpin_map(buf->vma->obj);
Expand Down
Loading

0 comments on commit e3d2913

Please sign in to comment.