Skip to content

Commit

Permalink
drm/i915: Extract i915_gem_obj_prepare_shmem_write()
Browse files Browse the repository at this point in the history
This is a companion to i915_gem_obj_prepare_shmem_read() that prepares
the backing storage for direct writes. It first serialises with the GPU,
pins the backing storage and then indicates what clfushes are required in
order for the writes to be coherent.

Whilst here, fix support for ancient CPUs without clflush for which we
cannot do the GTT+clflush tricks.

v2: Add i915_gem_obj_finish_shmem_access() for symmetry

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20160818161718.27187-8-chris@chris-wilson.co.uk
  • Loading branch information
Chris Wilson committed Aug 18, 2016
1 parent 31a3920 commit 43394c7
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 65 deletions.
4 changes: 2 additions & 2 deletions drivers/gpu/drm/i915/i915_cmd_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -973,7 +973,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
u32 batch_start_offset,
u32 batch_len)
{
int needs_clflush = 0;
unsigned int needs_clflush;
void *src_base, *src;
void *dst = NULL;
int ret;
Expand Down Expand Up @@ -1020,7 +1020,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
unmap_src:
vunmap(src_base);
unpin_src:
i915_gem_object_unpin_pages(src_obj);
i915_gem_obj_finish_shmem_access(src_obj);

return ret ? ERR_PTR(ret) : dst;
}
Expand Down
17 changes: 14 additions & 3 deletions drivers/gpu/drm/i915/i915_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -3098,9 +3098,6 @@ int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv);
void i915_gem_release_mmap(struct drm_i915_gem_object *obj);

int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
int *needs_clflush);

int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);

static inline int __sg_page_count(struct scatterlist *sg)
Expand Down Expand Up @@ -3201,6 +3198,20 @@ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
i915_gem_object_unpin_pages(obj);
}

int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
unsigned int *needs_clflush);
int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
unsigned int *needs_clflush);
#define CLFLUSH_BEFORE 0x1
#define CLFLUSH_AFTER 0x2
#define CLFLUSH_FLAGS (CLFLUSH_BEFORE | CLFLUSH_AFTER)

static inline void
i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
{
i915_gem_object_unpin_pages(obj);
}

int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
int i915_gem_object_sync(struct drm_i915_gem_object *obj,
struct drm_i915_gem_request *to);
Expand Down
146 changes: 86 additions & 60 deletions drivers/gpu/drm/i915/i915_gem.c
Original file line number Diff line number Diff line change
Expand Up @@ -609,35 +609,95 @@ __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
* flush the object from the CPU cache.
*/
int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
int *needs_clflush)
unsigned int *needs_clflush)
{
int ret;

*needs_clflush = 0;

if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
return -EINVAL;
if (!i915_gem_object_has_struct_page(obj))
return -ENODEV;

ret = i915_gem_object_wait_rendering(obj, true);
if (ret)
return ret;

if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
/* If we're not in the cpu read domain, set ourself into the gtt
* read domain and manually flush cachelines (if required). This
* optimizes for the case when the gpu will dirty the data
* anyway again before the next pread happens. */
/* If we're not in the cpu read domain, set ourself into the gtt
* read domain and manually flush cachelines (if required). This
* optimizes for the case when the gpu will dirty the data
* anyway again before the next pread happens.
*/
if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
*needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
obj->cache_level);

ret = i915_gem_object_get_pages(obj);
if (ret)
return ret;

i915_gem_object_pin_pages(obj);

if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
ret = i915_gem_object_set_to_cpu_domain(obj, false);
if (ret) {
i915_gem_object_unpin_pages(obj);
return ret;
}
*needs_clflush = 0;
}

return 0;
}

int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
unsigned int *needs_clflush)
{
int ret;

*needs_clflush = 0;
if (!i915_gem_object_has_struct_page(obj))
return -ENODEV;

ret = i915_gem_object_wait_rendering(obj, false);
if (ret)
return ret;

/* If we're not in the cpu write domain, set ourself into the
* gtt write domain and manually flush cachelines (as required).
* This optimizes for the case when the gpu will use the data
* right away and we therefore have to clflush anyway.
*/
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
*needs_clflush |= cpu_write_needs_clflush(obj) << 1;

/* Same trick applies to invalidate partially written cachelines read
* before writing.
*/
if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
*needs_clflush |= !cpu_cache_is_coherent(obj->base.dev,
obj->cache_level);

ret = i915_gem_object_get_pages(obj);
if (ret)
return ret;

i915_gem_object_pin_pages(obj);

return ret;
if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
ret = i915_gem_object_set_to_cpu_domain(obj, true);
if (ret) {
i915_gem_object_unpin_pages(obj);
return ret;
}
*needs_clflush = 0;
}

if ((*needs_clflush & CLFLUSH_AFTER) == 0)
obj->cache_dirty = true;

intel_fb_obj_invalidate(obj, ORIGIN_CPU);
obj->dirty = 1;
return 0;
}

/* Per-page copy function for the shmem pread fastpath.
Expand Down Expand Up @@ -872,19 +932,14 @@ i915_gem_shmem_pread(struct drm_device *dev,
int needs_clflush = 0;
struct sg_page_iter sg_iter;

if (!i915_gem_object_has_struct_page(obj))
return -ENODEV;

user_data = u64_to_user_ptr(args->data_ptr);
remain = args->size;

obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);

ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
if (ret)
return ret;

obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
user_data = u64_to_user_ptr(args->data_ptr);
offset = args->offset;
remain = args->size;

for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
offset >> PAGE_SHIFT) {
Expand Down Expand Up @@ -940,7 +995,7 @@ i915_gem_shmem_pread(struct drm_device *dev,
}

out:
i915_gem_object_unpin_pages(obj);
i915_gem_obj_finish_shmem_access(obj);

return ret;
}
Expand Down Expand Up @@ -1248,42 +1303,17 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
int shmem_page_offset, page_length, ret = 0;
int obj_do_bit17_swizzling, page_do_bit17_swizzling;
int hit_slowpath = 0;
int needs_clflush_after = 0;
int needs_clflush_before = 0;
unsigned int needs_clflush;
struct sg_page_iter sg_iter;

user_data = u64_to_user_ptr(args->data_ptr);
remain = args->size;

obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);

ret = i915_gem_object_wait_rendering(obj, false);
ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
if (ret)
return ret;

if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
/* If we're not in the cpu write domain, set ourself into the gtt
* write domain and manually flush cachelines (if required). This
* optimizes for the case when the gpu will use the data
* right away and we therefore have to clflush anyway. */
needs_clflush_after = cpu_write_needs_clflush(obj);
}
/* Same trick applies to invalidate partially written cachelines read
* before writing. */
if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
needs_clflush_before =
!cpu_cache_is_coherent(dev, obj->cache_level);

ret = i915_gem_object_get_pages(obj);
if (ret)
return ret;

intel_fb_obj_invalidate(obj, ORIGIN_CPU);

i915_gem_object_pin_pages(obj);

obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
user_data = u64_to_user_ptr(args->data_ptr);
offset = args->offset;
obj->dirty = 1;
remain = args->size;

for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
offset >> PAGE_SHIFT) {
Expand All @@ -1307,7 +1337,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
/* If we don't overwrite a cacheline completely we need to be
* careful to have up-to-date data by first clflushing. Don't
* overcomplicate things and flush the entire patch. */
partial_cacheline_write = needs_clflush_before &&
partial_cacheline_write = needs_clflush & CLFLUSH_BEFORE &&
((shmem_page_offset | page_length)
& (boot_cpu_data.x86_clflush_size - 1));

Expand All @@ -1317,7 +1347,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
user_data, page_do_bit17_swizzling,
partial_cacheline_write,
needs_clflush_after);
needs_clflush & CLFLUSH_AFTER);
if (ret == 0)
goto next_page;

Expand All @@ -1326,7 +1356,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
user_data, page_do_bit17_swizzling,
partial_cacheline_write,
needs_clflush_after);
needs_clflush & CLFLUSH_AFTER);

mutex_lock(&dev->struct_mutex);

Expand All @@ -1340,25 +1370,23 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
}

out:
i915_gem_object_unpin_pages(obj);
i915_gem_obj_finish_shmem_access(obj);

if (hit_slowpath) {
/*
* Fixup: Flush cpu caches in case we didn't flush the dirty
* cachelines in-line while writing and the object moved
* out of the cpu write domain while we've dropped the lock.
*/
if (!needs_clflush_after &&
if (!(needs_clflush & CLFLUSH_AFTER) &&
obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
if (i915_gem_clflush_object(obj, obj->pin_display))
needs_clflush_after = true;
needs_clflush |= CLFLUSH_AFTER;
}
}

if (needs_clflush_after)
if (needs_clflush & CLFLUSH_AFTER)
i915_gem_chipset_flush(to_i915(dev));
else
obj->cache_dirty = true;

intel_fb_obj_flush(obj, false, ORIGIN_CPU);
return ret;
Expand Down Expand Up @@ -1437,10 +1465,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
if (ret == -EFAULT || ret == -ENOSPC) {
if (obj->phys_handle)
ret = i915_gem_phys_pwrite(obj, args, file);
else if (i915_gem_object_has_struct_page(obj))
ret = i915_gem_shmem_pwrite(dev, obj, args, file);
else
ret = -ENODEV;
ret = i915_gem_shmem_pwrite(dev, obj, args, file);
}

i915_gem_object_put(obj);
Expand Down

0 comments on commit 43394c7

Please sign in to comment.