From 6bf29d02c0906aef2a4e261a34bcd28ba1725ac1 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sun, 25 Mar 2012 19:47:37 +0200 Subject: [PATCH] --- yaml --- r: 306997 b: refs/heads/master c: 586428852a4fe64d77dc3e34c446fba33a2ca971 h: refs/heads/master i: 306995: 5fe84fce5211506e46880c1d66bdbcc54c1e3d96 v: v3 --- [refs] | 2 +- trunk/drivers/gpu/drm/i915/i915_gem.c | 46 ++++++++++++++++++++++++--- 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/[refs] b/[refs] index 7a5ef8e16277..60aabb2501b9 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 96d79b52701758404cf8701986891afc99ce810b +refs/heads/master: 586428852a4fe64d77dc3e34c446fba33a2ca971 diff --git a/trunk/drivers/gpu/drm/i915/i915_gem.c b/trunk/drivers/gpu/drm/i915/i915_gem.c index 292a74f2fa87..83dfb4407c8f 100644 --- a/trunk/drivers/gpu/drm/i915/i915_gem.c +++ b/trunk/drivers/gpu/drm/i915/i915_gem.c @@ -570,23 +570,39 @@ i915_gem_shmem_pwrite(struct drm_device *dev, int shmem_page_offset, page_length, ret = 0; int obj_do_bit17_swizzling, page_do_bit17_swizzling; int hit_slowpath = 0; + int needs_clflush_after = 0; + int needs_clflush_before = 0; int release_page; - ret = i915_gem_object_set_to_cpu_domain(obj, 1); - if (ret) - return ret; - user_data = (char __user *) (uintptr_t) args->data_ptr; remain = args->size; obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); + if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { + /* If we're not in the cpu write domain, set ourself into the gtt + * write domain and manually flush cachelines (if required). This + * optimizes for the case when the gpu will use the data + * right away and we therefore have to clflush anyway. */ + if (obj->cache_level == I915_CACHE_NONE) + needs_clflush_after = 1; + ret = i915_gem_object_set_to_gtt_domain(obj, true); + if (ret) + return ret; + } + /* Same trick applies for invalidate partially written cachelines before + * writing. */ + if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU) + && obj->cache_level == I915_CACHE_NONE) + needs_clflush_before = 1; + offset = args->offset; obj->dirty = 1; while (remain > 0) { struct page *page; char *vaddr; + int partial_cacheline_write; /* Operation in this page * @@ -599,6 +615,13 @@ i915_gem_shmem_pwrite(struct drm_device *dev, if ((shmem_page_offset + page_length) > PAGE_SIZE) page_length = PAGE_SIZE - shmem_page_offset; + /* If we don't overwrite a cacheline completely we need to be + * careful to have up-to-date data by first clflushing. Don't + * overcomplicate things and flush the entire patch. */ + partial_cacheline_write = needs_clflush_before && + ((shmem_page_offset | page_length) + & (boot_cpu_data.x86_clflush_size - 1)); + if (obj->pages) { page = obj->pages[offset >> PAGE_SHIFT]; release_page = 0; @@ -616,9 +639,15 @@ i915_gem_shmem_pwrite(struct drm_device *dev, if (!page_do_bit17_swizzling) { vaddr = kmap_atomic(page); + if (partial_cacheline_write) + drm_clflush_virt_range(vaddr + shmem_page_offset, + page_length); ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, user_data, page_length); + if (needs_clflush_after) + drm_clflush_virt_range(vaddr + shmem_page_offset, + page_length); kunmap_atomic(vaddr); if (ret == 0) @@ -630,6 +659,9 @@ i915_gem_shmem_pwrite(struct drm_device *dev, mutex_unlock(&dev->struct_mutex); vaddr = kmap(page); + if (partial_cacheline_write) + drm_clflush_virt_range(vaddr + shmem_page_offset, + page_length); if (page_do_bit17_swizzling) ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, user_data, @@ -638,6 +670,9 @@ i915_gem_shmem_pwrite(struct drm_device *dev, ret = __copy_from_user(vaddr + shmem_page_offset, user_data, page_length); + if (needs_clflush_after) + drm_clflush_virt_range(vaddr + shmem_page_offset, + page_length); kunmap(page); mutex_lock(&dev->struct_mutex); @@ -671,6 +706,9 @@ i915_gem_shmem_pwrite(struct drm_device *dev, } } + if (needs_clflush_after) + intel_gtt_chipset_flush(); + return ret; }