Skip to content

Commit

Permalink
drm/i915/cmdparser: Accelerate copies from WC memory
Browse files Browse the repository at this point in the history
If we need to use clflush to prepare our batch for reads from memory, we
can bypass the cache instead by using non-temporal copies.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.william.auld@gmail.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20160818161718.27187-39-chris@chris-wilson.co.uk
  • Loading branch information
Chris Wilson committed Aug 18, 2016
1 parent 76ff480 commit 52a42ce
Showing 1 changed file with 43 additions and 27 deletions.
70 changes: 43 additions & 27 deletions drivers/gpu/drm/i915/i915_cmd_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -965,8 +965,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
{
unsigned int src_needs_clflush;
unsigned int dst_needs_clflush;
void *dst, *ptr;
int offset, n;
void *dst, *src;
int ret;

ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush);
Expand All @@ -983,31 +982,48 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
if (IS_ERR(dst))
goto unpin_dst;

ptr = dst;
offset = offset_in_page(batch_start_offset);

/* We can avoid clflushing partial cachelines before the write if we
* only every write full cache-lines. Since we know that both the
* source and destination are in multiples of PAGE_SIZE, we can simply
* round up to the next cacheline. We don't care about copying too much
* here as we only validate up to the end of the batch.
*/
if (dst_needs_clflush & CLFLUSH_BEFORE)
batch_len = roundup(batch_len, boot_cpu_data.x86_clflush_size);

for (n = batch_start_offset >> PAGE_SHIFT; batch_len; n++) {
int len = min_t(int, batch_len, PAGE_SIZE - offset);
void *vaddr;

vaddr = kmap_atomic(i915_gem_object_get_page(src_obj, n));
if (src_needs_clflush)
drm_clflush_virt_range(vaddr + offset, len);
memcpy(ptr, vaddr + offset, len);
kunmap_atomic(vaddr);

ptr += len;
batch_len -= len;
offset = 0;
src = ERR_PTR(-ENODEV);
if (src_needs_clflush &&
i915_memcpy_from_wc((void *)(uintptr_t)batch_start_offset, 0, 0)) {
src = i915_gem_object_pin_map(src_obj, I915_MAP_WC);
if (!IS_ERR(src)) {
i915_memcpy_from_wc(dst,
src + batch_start_offset,
ALIGN(batch_len, 16));
i915_gem_object_unpin_map(src_obj);
}
}
if (IS_ERR(src)) {
void *ptr;
int offset, n;

offset = offset_in_page(batch_start_offset);

/* We can avoid clflushing partial cachelines before the write
* if we only every write full cache-lines. Since we know that
* both the source and destination are in multiples of
* PAGE_SIZE, we can simply round up to the next cacheline.
* We don't care about copying too much here as we only
* validate up to the end of the batch.
*/
if (dst_needs_clflush & CLFLUSH_BEFORE)
batch_len = roundup(batch_len,
boot_cpu_data.x86_clflush_size);

ptr = dst;
for (n = batch_start_offset >> PAGE_SHIFT; batch_len; n++) {
int len = min_t(int, batch_len, PAGE_SIZE - offset);

src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
if (src_needs_clflush)
drm_clflush_virt_range(src + offset, len);
memcpy(ptr, src + offset, len);
kunmap_atomic(src);

ptr += len;
batch_len -= len;
offset = 0;
}
}

/* dst_obj is returned with vmap pinned */
Expand Down

0 comments on commit 52a42ce

Please sign in to comment.