diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 4cef9e9e05c58..8d0df7d81d8b1 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -39,6 +39,8 @@ #include "intel_drv.h" #include "intel_frontbuffer.h" +#define DBG_USE_CPU_RELOC 0 /* -1 force GTT relocs; 1 force CPU relocs */ + #define __EXEC_OBJECT_HAS_PIN (1<<31) #define __EXEC_OBJECT_HAS_FENCE (1<<30) #define __EXEC_OBJECT_NEEDS_MAP (1<<29) @@ -59,6 +61,7 @@ struct i915_execbuffer_params { }; struct eb_vmas { + struct drm_i915_private *i915; struct list_head vmas; int and; union { @@ -68,7 +71,8 @@ struct eb_vmas { }; static struct eb_vmas * -eb_create(struct drm_i915_gem_execbuffer2 *args) +eb_create(struct drm_i915_private *i915, + struct drm_i915_gem_execbuffer2 *args) { struct eb_vmas *eb = NULL; @@ -95,6 +99,7 @@ eb_create(struct drm_i915_gem_execbuffer2 *args) } else eb->and = -args->buffer_count; + eb->i915 = i915; INIT_LIST_HEAD(&eb->vmas); return eb; } @@ -278,6 +283,9 @@ static void eb_destroy(struct eb_vmas *eb) static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) { + if (DBG_USE_CPU_RELOC) + return DBG_USE_CPU_RELOC > 0; + return (HAS_LLC(obj->base.dev) || obj->base.write_domain == I915_GEM_DOMAIN_CPU || obj->cache_level != I915_CACHE_NONE); @@ -302,37 +310,58 @@ static inline uint64_t gen8_noncanonical_addr(uint64_t address) } static inline uint64_t -relocation_target(struct drm_i915_gem_relocation_entry *reloc, +relocation_target(const struct drm_i915_gem_relocation_entry *reloc, uint64_t target_offset) { return gen8_canonical_addr((int)reloc->delta + target_offset); } struct reloc_cache { - void *vaddr; + struct drm_i915_private *i915; + struct drm_mm_node node; + unsigned long vaddr; unsigned int page; - enum { KMAP, IOMAP } type; + bool use_64bit_reloc; }; -static void reloc_cache_init(struct reloc_cache *cache) +static void reloc_cache_init(struct reloc_cache *cache, + struct drm_i915_private *i915) { cache->page = -1; - cache->vaddr = NULL; + cache->vaddr = 0; + cache->i915 = i915; + cache->use_64bit_reloc = INTEL_GEN(cache->i915) >= 8; +} + +static inline void *unmask_page(unsigned long p) +{ + return (void *)(uintptr_t)(p & PAGE_MASK); +} + +static inline unsigned int unmask_flags(unsigned long p) +{ + return p & ~PAGE_MASK; } +#define KMAP 0x4 /* after CLFLUSH_FLAGS */ + static void reloc_cache_fini(struct reloc_cache *cache) { + void *vaddr; + if (!cache->vaddr) return; - switch (cache->type) { - case KMAP: - kunmap_atomic(cache->vaddr); - break; + vaddr = unmask_page(cache->vaddr); + if (cache->vaddr & KMAP) { + if (cache->vaddr & CLFLUSH_AFTER) + mb(); - case IOMAP: - io_mapping_unmap_atomic(cache->vaddr); - break; + kunmap_atomic(vaddr); + i915_gem_obj_finish_shmem_access((struct drm_i915_gem_object *)cache->node.mm); + } else { + io_mapping_unmap_atomic((void __iomem *)vaddr); + i915_vma_unpin((struct i915_vma *)cache->node.mm); } } @@ -340,147 +369,142 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj, struct reloc_cache *cache, int page) { - if (cache->page == page) - return cache->vaddr; - - if (cache->vaddr) - kunmap_atomic(cache->vaddr); - - cache->page = page; - cache->vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page)); - cache->type = KMAP; - - return cache->vaddr; -} + void *vaddr; -static int -relocate_entry_cpu(struct drm_i915_gem_object *obj, - struct drm_i915_gem_relocation_entry *reloc, - struct reloc_cache *cache, - uint64_t target_offset) -{ - struct drm_device *dev = obj->base.dev; - uint32_t page_offset = offset_in_page(reloc->offset); - uint64_t delta = relocation_target(reloc, target_offset); - char *vaddr; - int ret; + if (cache->vaddr) { + kunmap_atomic(unmask_page(cache->vaddr)); + } else { + unsigned int flushes; + int ret; - ret = i915_gem_object_set_to_cpu_domain(obj, true); - if (ret) - return ret; + ret = i915_gem_obj_prepare_shmem_write(obj, &flushes); + if (ret) + return ERR_PTR(ret); - vaddr = reloc_kmap(obj, cache, reloc->offset >> PAGE_SHIFT); - *(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta); + BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); + BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK); - if (INTEL_GEN(dev) >= 8) { - page_offset += sizeof(uint32_t); - if (page_offset == PAGE_SIZE) { - vaddr = reloc_kmap(obj, cache, cache->page + 1); - page_offset = 0; - } - *(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta); + cache->vaddr = flushes | KMAP; + cache->node.mm = (void *)obj; + if (flushes) + mb(); } - return 0; + vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page)); + cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; + cache->page = page; + + return vaddr; } -static void *reloc_iomap(struct drm_i915_private *i915, +static void *reloc_iomap(struct drm_i915_gem_object *obj, struct reloc_cache *cache, - uint64_t offset) + int page) { - if (cache->page == offset >> PAGE_SHIFT) - return cache->vaddr; + void *vaddr; + + if (cache->vaddr) { + io_mapping_unmap_atomic(unmask_page(cache->vaddr)); + } else { + struct i915_vma *vma; + int ret; - if (cache->vaddr) - io_mapping_unmap_atomic(cache->vaddr); + if (use_cpu_reloc(obj)) + return NULL; - cache->page = offset >> PAGE_SHIFT; - cache->vaddr = - io_mapping_map_atomic_wc(i915->ggtt.mappable, - offset & PAGE_MASK); - cache->type = IOMAP; + ret = i915_gem_object_set_to_gtt_domain(obj, true); + if (ret) + return ERR_PTR(ret); - return cache->vaddr; -} + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, + PIN_MAPPABLE | PIN_NONBLOCK); + if (IS_ERR(vma)) + return NULL; -static int -relocate_entry_gtt(struct drm_i915_gem_object *obj, - struct drm_i915_gem_relocation_entry *reloc, - struct reloc_cache *cache, - uint64_t target_offset) -{ - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct i915_vma *vma; - uint64_t delta = relocation_target(reloc, target_offset); - uint64_t offset; - void __iomem *reloc_page; - int ret; + ret = i915_gem_object_put_fence(obj); + if (ret) { + i915_vma_unpin(vma); + return ERR_PTR(ret); + } - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); - if (IS_ERR(vma)) - return PTR_ERR(vma); + cache->node.start = vma->node.start; + cache->node.mm = (void *)vma; + } - ret = i915_gem_object_set_to_gtt_domain(obj, true); - if (ret) - goto unpin; + vaddr = io_mapping_map_atomic_wc(cache->i915->ggtt.mappable, + cache->node.start + (page << PAGE_SHIFT)); + cache->page = page; + cache->vaddr = (unsigned long)vaddr; - ret = i915_gem_object_put_fence(obj); - if (ret) - goto unpin; + return vaddr; +} - /* Map the page containing the relocation we're going to perform. */ - offset = vma->node.start + reloc->offset; - reloc_page = reloc_iomap(dev_priv, cache, offset); - iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset)); +static void *reloc_vaddr(struct drm_i915_gem_object *obj, + struct reloc_cache *cache, + int page) +{ + void *vaddr; - if (INTEL_GEN(dev_priv) >= 8) { - offset += sizeof(uint32_t); - if (offset_in_page(offset) == 0) - reloc_page = reloc_iomap(dev_priv, cache, offset); - iowrite32(upper_32_bits(delta), - reloc_page + offset_in_page(offset)); + if (cache->page == page) { + vaddr = unmask_page(cache->vaddr); + } else { + vaddr = NULL; + if ((cache->vaddr & KMAP) == 0) + vaddr = reloc_iomap(obj, cache, page); + if (!vaddr) + vaddr = reloc_kmap(obj, cache, page); } -unpin: - __i915_vma_unpin(vma); - return ret; + return vaddr; } -static void -clflush_write32(void *addr, uint32_t value) +static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) { - /* This is not a fast path, so KISS. */ - drm_clflush_virt_range(addr, sizeof(uint32_t)); - *(uint32_t *)addr = value; - drm_clflush_virt_range(addr, sizeof(uint32_t)); + if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) { + if (flushes & CLFLUSH_BEFORE) { + clflushopt(addr); + mb(); + } + + *addr = value; + + /* Writes to the same cacheline are serialised by the CPU + * (including clflush). On the write path, we only require + * that it hits memory in an orderly fashion and place + * mb barriers at the start and end of the relocation phase + * to ensure ordering of clflush wrt to the system. + */ + if (flushes & CLFLUSH_AFTER) + clflushopt(addr); + } else + *addr = value; } static int -relocate_entry_clflush(struct drm_i915_gem_object *obj, - struct drm_i915_gem_relocation_entry *reloc, - struct reloc_cache *cache, - uint64_t target_offset) +relocate_entry(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_relocation_entry *reloc, + struct reloc_cache *cache, + u64 target_offset) { - struct drm_device *dev = obj->base.dev; - uint32_t page_offset = offset_in_page(reloc->offset); - uint64_t delta = relocation_target(reloc, target_offset); - char *vaddr; - int ret; + u64 offset = reloc->offset; + bool wide = cache->use_64bit_reloc; + void *vaddr; - ret = i915_gem_object_set_to_gtt_domain(obj, true); - if (ret) - return ret; + target_offset = relocation_target(reloc, target_offset); +repeat: + vaddr = reloc_vaddr(obj, cache, offset >> PAGE_SHIFT); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); - vaddr = reloc_kmap(obj, cache, reloc->offset >> PAGE_SHIFT); - clflush_write32(vaddr + page_offset, lower_32_bits(delta)); + clflush_write32(vaddr + offset_in_page(offset), + lower_32_bits(target_offset), + cache->vaddr); - if (INTEL_GEN(dev) >= 8) { - page_offset += sizeof(uint32_t); - if (page_offset == PAGE_SIZE) { - vaddr = reloc_kmap(obj, cache, cache->page + 1); - page_offset = 0; - } - clflush_write32(vaddr + page_offset, upper_32_bits(delta)); + if (wide) { + offset += sizeof(u32); + target_offset >>= 32; + wide = false; + goto repeat; } return 0; @@ -567,7 +591,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, /* Check that the relocation address is valid... */ if (unlikely(reloc->offset > - obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) { + obj->base.size - (cache->use_64bit_reloc ? 8 : 4))) { DRM_DEBUG("Relocation beyond object bounds: " "obj %p target %d offset %d size %d.\n", obj, reloc->target_handle, @@ -587,23 +611,12 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, if (pagefault_disabled() && !object_is_idle(obj)) return -EFAULT; - if (use_cpu_reloc(obj)) - ret = relocate_entry_cpu(obj, reloc, cache, target_offset); - else if (obj->map_and_fenceable) - ret = relocate_entry_gtt(obj, reloc, cache, target_offset); - else if (static_cpu_has(X86_FEATURE_CLFLUSH)) - ret = relocate_entry_clflush(obj, reloc, cache, target_offset); - else { - WARN_ONCE(1, "Impossible case in relocation handling\n"); - ret = -ENODEV; - } - + ret = relocate_entry(obj, reloc, cache, target_offset); if (ret) return ret; /* and update the user's relocation entry */ reloc->presumed_offset = target_offset; - return 0; } @@ -619,7 +632,7 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, int remain, ret = 0; user_relocs = u64_to_user_ptr(entry->relocs_ptr); - reloc_cache_init(&cache); + reloc_cache_init(&cache, eb->i915); remain = entry->relocation_count; while (remain) { @@ -668,7 +681,7 @@ i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma, struct reloc_cache cache; int i, ret = 0; - reloc_cache_init(&cache); + reloc_cache_init(&cache, eb->i915); for (i = 0; i < entry->relocation_count; i++) { ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], &cache); if (ret) @@ -1647,7 +1660,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, memset(¶ms_master, 0x00, sizeof(params_master)); - eb = eb_create(args); + eb = eb_create(dev_priv, args); if (eb == NULL) { i915_gem_context_put(ctx); mutex_unlock(&dev->struct_mutex);