Skip to content

Commit

Permalink
drm/i915: Make the physical object coherent with GTT
Browse files Browse the repository at this point in the history
Currently objects for which the hardware needs a contiguous physical
address are allocated a shadow backing storage to satisfy the contraint.
This shadow buffer is not wired into the normal obj->pages and so the
physical object is incoherent with accesses via the GPU, GTT and CPU. By
setting up the appropriate scatter-gather table, we can allow userspace
to access the physical object via either a GTT mmaping of or by rendering
into the GEM bo. However, keeping the CPU mmap of the shmemfs backing
storage coherent with the contiguous shadow is not yet possible.
Fortuituously, CPU mmaps of objects requiring physical addresses are not
expected to be coherent anyway.

This allows the physical constraint of the GEM object to be transparent
to userspace and allow it to efficiently render into or update them via
the GTT and GPU.

v2: Fix leak of pci handle spotted by Ville
v3: Remove the now duplicate call to detach_phys_object during free.
v4: Wait for rendering before pwrite. As this patch makes it possible to
render into the phys object, we should make it correct as well!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
  • Loading branch information
Chris Wilson authored and Daniel Vetter committed Nov 14, 2014
1 parent 132f3f1 commit 6a2c423
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 67 deletions.
3 changes: 3 additions & 0 deletions drivers/gpu/drm/i915/i915_dma.c
Original file line number Diff line number Diff line change
Expand Up @@ -1027,6 +1027,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_CMD_PARSER_VERSION:
value = i915_cmd_parser_get_version();
break;
case I915_PARAM_HAS_COHERENT_PHYS_GTT:
value = 1;
break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
Expand Down
6 changes: 3 additions & 3 deletions drivers/gpu/drm/i915/i915_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -1957,10 +1957,10 @@ struct drm_i915_gem_object {
unsigned long user_pin_count;
struct drm_file *pin_filp;

/** for phy allocated objects */
struct drm_dma_handle *phys_handle;

union {
/** for phy allocated objects */
struct drm_dma_handle *phys_handle;

struct i915_gem_userptr {
uintptr_t ptr;
unsigned read_only :1;
Expand Down
207 changes: 143 additions & 64 deletions drivers/gpu/drm/i915/i915_gem.c
Original file line number Diff line number Diff line change
Expand Up @@ -208,50 +208,145 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
return 0;
}

static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj)
static int
i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
{
drm_dma_handle_t *phys = obj->phys_handle;
struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
char *vaddr = obj->phys_handle->vaddr;
struct sg_table *st;
struct scatterlist *sg;
int i;

if (!phys)
return;
if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
return -EINVAL;

for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
struct page *page;
char *src;

page = shmem_read_mapping_page(mapping, i);
if (IS_ERR(page))
return PTR_ERR(page);

src = kmap_atomic(page);
memcpy(vaddr, src, PAGE_SIZE);
drm_clflush_virt_range(vaddr, PAGE_SIZE);
kunmap_atomic(src);

page_cache_release(page);
vaddr += PAGE_SIZE;
}

i915_gem_chipset_flush(obj->base.dev);

st = kmalloc(sizeof(*st), GFP_KERNEL);
if (st == NULL)
return -ENOMEM;

if (sg_alloc_table(st, 1, GFP_KERNEL)) {
kfree(st);
return -ENOMEM;
}

sg = st->sgl;
sg->offset = 0;
sg->length = obj->base.size;

if (obj->madv == I915_MADV_WILLNEED) {
sg_dma_address(sg) = obj->phys_handle->busaddr;
sg_dma_len(sg) = obj->base.size;

obj->pages = st;
obj->has_dma_mapping = true;
return 0;
}

static void
i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
{
int ret;

BUG_ON(obj->madv == __I915_MADV_PURGED);

ret = i915_gem_object_set_to_cpu_domain(obj, true);
if (ret) {
/* In the event of a disaster, abandon all caches and
* hope for the best.
*/
WARN_ON(ret != -EIO);
obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
}

if (obj->madv == I915_MADV_DONTNEED)
obj->dirty = 0;

if (obj->dirty) {
struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
char *vaddr = phys->vaddr;
char *vaddr = obj->phys_handle->vaddr;
int i;

for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
struct page *page = shmem_read_mapping_page(mapping, i);
if (!IS_ERR(page)) {
char *dst = kmap_atomic(page);
memcpy(dst, vaddr, PAGE_SIZE);
drm_clflush_virt_range(dst, PAGE_SIZE);
kunmap_atomic(dst);

set_page_dirty(page);
struct page *page;
char *dst;

page = shmem_read_mapping_page(mapping, i);
if (IS_ERR(page))
continue;

dst = kmap_atomic(page);
drm_clflush_virt_range(vaddr, PAGE_SIZE);
memcpy(dst, vaddr, PAGE_SIZE);
kunmap_atomic(dst);

set_page_dirty(page);
if (obj->madv == I915_MADV_WILLNEED)
mark_page_accessed(page);
page_cache_release(page);
}
page_cache_release(page);
vaddr += PAGE_SIZE;
}
i915_gem_chipset_flush(obj->base.dev);
obj->dirty = 0;
}

#ifdef CONFIG_X86
set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
#endif
drm_pci_free(obj->base.dev, phys);
obj->phys_handle = NULL;
sg_free_table(obj->pages);
kfree(obj->pages);

obj->has_dma_mapping = false;
}

static void
i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
{
drm_pci_free(obj->base.dev, obj->phys_handle);
}

static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
.get_pages = i915_gem_object_get_pages_phys,
.put_pages = i915_gem_object_put_pages_phys,
.release = i915_gem_object_release_phys,
};

static int
drop_pages(struct drm_i915_gem_object *obj)
{
struct i915_vma *vma, *next;
int ret;

drm_gem_object_reference(&obj->base);
list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link)
if (i915_vma_unbind(vma))
break;

ret = i915_gem_object_put_pages(obj);
drm_gem_object_unreference(&obj->base);

return ret;
}

int
i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
int align)
{
drm_dma_handle_t *phys;
struct address_space *mapping;
char *vaddr;
int i;
int ret;

if (obj->phys_handle) {
if ((unsigned long)obj->phys_handle->vaddr & (align -1))
Expand All @@ -266,41 +361,19 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
if (obj->base.filp == NULL)
return -EINVAL;

ret = drop_pages(obj);
if (ret)
return ret;

/* create a new object */
phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
if (!phys)
return -ENOMEM;

vaddr = phys->vaddr;
#ifdef CONFIG_X86
set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE);
#endif
mapping = file_inode(obj->base.filp)->i_mapping;
for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
struct page *page;
char *src;

page = shmem_read_mapping_page(mapping, i);
if (IS_ERR(page)) {
#ifdef CONFIG_X86
set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
#endif
drm_pci_free(obj->base.dev, phys);
return PTR_ERR(page);
}

src = kmap_atomic(page);
memcpy(vaddr, src, PAGE_SIZE);
kunmap_atomic(src);

mark_page_accessed(page);
page_cache_release(page);

vaddr += PAGE_SIZE;
}

obj->phys_handle = phys;
return 0;
obj->ops = &i915_gem_phys_ops;

return i915_gem_object_get_pages(obj);
}

static int
Expand All @@ -311,6 +384,14 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
struct drm_device *dev = obj->base.dev;
void *vaddr = obj->phys_handle->vaddr + args->offset;
char __user *user_data = to_user_ptr(args->data_ptr);
int ret;

/* We manually control the domain here and pretend that it
* remains coherent i.e. in the GTT domain, like shmem_pwrite.
*/
ret = i915_gem_object_wait_rendering(obj, false);
if (ret)
return ret;

if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
unsigned long unwritten;
Expand All @@ -326,6 +407,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
return -EFAULT;
}

drm_clflush_virt_range(vaddr, args->size);
i915_gem_chipset_flush(dev);
return 0;
}
Expand Down Expand Up @@ -1046,11 +1128,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
* pread/pwrite currently are reading and writing from the CPU
* perspective, requiring manual detiling by the client.
*/
if (obj->phys_handle) {
ret = i915_gem_phys_pwrite(obj, args, file);
goto out;
}

if (obj->tiling_mode == I915_TILING_NONE &&
obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
cpu_write_needs_clflush(obj)) {
Expand All @@ -1060,8 +1137,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
* textures). Fallback to the shmem path in that case. */
}

if (ret == -EFAULT || ret == -ENOSPC)
ret = i915_gem_shmem_pwrite(dev, obj, args, file);
if (ret == -EFAULT || ret == -ENOSPC) {
if (obj->phys_handle)
ret = i915_gem_phys_pwrite(obj, args, file);
else
ret = i915_gem_shmem_pwrite(dev, obj, args, file);
}

out:
drm_gem_object_unreference(&obj->base);
Expand Down Expand Up @@ -3509,7 +3590,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
* Stolen memory is always coherent with the GPU as it is explicitly
* marked as wc by the system, or the system is cache-coherent.
*/
if (obj->stolen)
if (obj->stolen || obj->phys_handle)
return false;

/* If the GPU is snooping the contents of the CPU cache,
Expand Down Expand Up @@ -4471,8 +4552,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
}
}

i915_gem_object_detach_phys(obj);

/* Stolen objects don't hold a ref, but do hold pin count. Fix that up
* before progressing. */
if (obj->stolen)
Expand Down
1 change: 1 addition & 0 deletions include/uapi/drm/i915_drm.h
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ typedef struct drm_i915_irq_wait {
#define I915_PARAM_HAS_EXEC_HANDLE_LUT 26
#define I915_PARAM_HAS_WT 27
#define I915_PARAM_CMD_PARSER_VERSION 28
#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29

typedef struct drm_i915_getparam {
int param;
Expand Down

0 comments on commit 6a2c423

Please sign in to comment.