Skip to content

Commit

Permalink
drm/i915: Allow tiling of objects with bit 17 swizzling by the CPU.
Browse files Browse the repository at this point in the history
Save the bit 17 state of the pages when freeing the page list, and
reswizzle them if necessary when rebinding the pages (in case they were
swapped out).  Since we have userland with expectations that the swizzle
enums let it pread and pwrite contents accurately, we can't expose a new
swizzle enum for bit 17 (which it would have to GTT map to handle), so we
handle it down in pread and pwrite by swizzling the copy when bit 17 of the
page address is set.

Signed-off-by: Eric Anholt <eric@anholt.net>
  • Loading branch information
Eric Anholt committed Apr 8, 2009
1 parent e5e9ecd commit 280b713
Show file tree
Hide file tree
Showing 4 changed files with 235 additions and 14 deletions.
5 changes: 5 additions & 0 deletions drivers/gpu/drm/i915/i915_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,9 @@ struct drm_i915_gem_object {
uint32_t tiling_mode;
uint32_t stride;

/** Record of address bit 17 of each page at last unbind. */
long *bit_17;

/** AGP mapping type (AGP_USER_MEMORY or AGP_USER_CACHED_MEMORY */
uint32_t agp_type;

Expand Down Expand Up @@ -640,6 +643,8 @@ void i915_gem_object_put_pages(struct drm_gem_object *obj);

/* i915_gem_tiling.c */
void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
void i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj);
void i915_gem_object_save_bit_17_swizzle(struct drm_gem_object *obj);

/* i915_gem_debug.c */
void i915_gem_dump_object(struct drm_gem_object *obj, int len,
Expand Down
130 changes: 118 additions & 12 deletions drivers/gpu/drm/i915/i915_gem.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,15 @@ fast_shmem_read(struct page **pages,
return 0;
}

static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj)
{
drm_i915_private_t *dev_priv = obj->dev->dev_private;
struct drm_i915_gem_object *obj_priv = obj->driver_private;

return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
obj_priv->tiling_mode != I915_TILING_NONE;
}

static inline int
slow_shmem_copy(struct page *dst_page,
int dst_offset,
Expand Down Expand Up @@ -182,6 +191,64 @@ slow_shmem_copy(struct page *dst_page,
return 0;
}

static inline int
slow_shmem_bit17_copy(struct page *gpu_page,
int gpu_offset,
struct page *cpu_page,
int cpu_offset,
int length,
int is_read)
{
char *gpu_vaddr, *cpu_vaddr;

/* Use the unswizzled path if this page isn't affected. */
if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
if (is_read)
return slow_shmem_copy(cpu_page, cpu_offset,
gpu_page, gpu_offset, length);
else
return slow_shmem_copy(gpu_page, gpu_offset,
cpu_page, cpu_offset, length);
}

gpu_vaddr = kmap_atomic(gpu_page, KM_USER0);
if (gpu_vaddr == NULL)
return -ENOMEM;

cpu_vaddr = kmap_atomic(cpu_page, KM_USER1);
if (cpu_vaddr == NULL) {
kunmap_atomic(gpu_vaddr, KM_USER0);
return -ENOMEM;
}

/* Copy the data, XORing A6 with A17 (1). The user already knows he's
* XORing with the other bits (A9 for Y, A9 and A10 for X)
*/
while (length > 0) {
int cacheline_end = ALIGN(gpu_offset + 1, 64);
int this_length = min(cacheline_end - gpu_offset, length);
int swizzled_gpu_offset = gpu_offset ^ 64;

if (is_read) {
memcpy(cpu_vaddr + cpu_offset,
gpu_vaddr + swizzled_gpu_offset,
this_length);
} else {
memcpy(gpu_vaddr + swizzled_gpu_offset,
cpu_vaddr + cpu_offset,
this_length);
}
cpu_offset += this_length;
gpu_offset += this_length;
length -= this_length;
}

kunmap_atomic(cpu_vaddr, KM_USER1);
kunmap_atomic(gpu_vaddr, KM_USER0);

return 0;
}

/**
* This is the fast shmem pread path, which attempts to copy_from_user directly
* from the backing pages of the object to the user's address space. On a
Expand Down Expand Up @@ -270,6 +337,7 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
int page_length;
int ret;
uint64_t data_ptr = args->data_ptr;
int do_bit17_swizzling;

remain = args->size;

Expand All @@ -294,6 +362,8 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
goto fail_put_user_pages;
}

do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);

mutex_lock(&dev->struct_mutex);

ret = i915_gem_object_get_pages(obj);
Expand Down Expand Up @@ -328,11 +398,20 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
if ((data_page_offset + page_length) > PAGE_SIZE)
page_length = PAGE_SIZE - data_page_offset;

ret = slow_shmem_copy(user_pages[data_page_index],
data_page_offset,
obj_priv->pages[shmem_page_index],
shmem_page_offset,
page_length);
if (do_bit17_swizzling) {
ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
shmem_page_offset,
user_pages[data_page_index],
data_page_offset,
page_length,
1);
} else {
ret = slow_shmem_copy(user_pages[data_page_index],
data_page_offset,
obj_priv->pages[shmem_page_index],
shmem_page_offset,
page_length);
}
if (ret)
goto fail_put_pages;

Expand Down Expand Up @@ -384,9 +463,14 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
return -EINVAL;
}

ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv);
if (ret != 0)
if (i915_gem_object_needs_bit17_swizzle(obj)) {
ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv);
} else {
ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv);
if (ret != 0)
ret = i915_gem_shmem_pread_slow(dev, obj, args,
file_priv);
}

drm_gem_object_unreference(obj);

Expand Down Expand Up @@ -728,6 +812,7 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
int page_length;
int ret;
uint64_t data_ptr = args->data_ptr;
int do_bit17_swizzling;

remain = args->size;

Expand All @@ -752,6 +837,8 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
goto fail_put_user_pages;
}

do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);

mutex_lock(&dev->struct_mutex);

ret = i915_gem_object_get_pages(obj);
Expand Down Expand Up @@ -786,11 +873,20 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
if ((data_page_offset + page_length) > PAGE_SIZE)
page_length = PAGE_SIZE - data_page_offset;

ret = slow_shmem_copy(obj_priv->pages[shmem_page_index],
shmem_page_offset,
user_pages[data_page_index],
data_page_offset,
page_length);
if (do_bit17_swizzling) {
ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
shmem_page_offset,
user_pages[data_page_index],
data_page_offset,
page_length,
0);
} else {
ret = slow_shmem_copy(obj_priv->pages[shmem_page_index],
shmem_page_offset,
user_pages[data_page_index],
data_page_offset,
page_length);
}
if (ret)
goto fail_put_pages;

Expand Down Expand Up @@ -855,6 +951,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
ret = i915_gem_gtt_pwrite_slow(dev, obj, args,
file_priv);
}
} else if (i915_gem_object_needs_bit17_swizzle(obj)) {
ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file_priv);
} else {
ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv);
if (ret == -EFAULT) {
Expand Down Expand Up @@ -1298,6 +1396,9 @@ i915_gem_object_put_pages(struct drm_gem_object *obj)
if (--obj_priv->pages_refcount != 0)
return;

if (obj_priv->tiling_mode != I915_TILING_NONE)
i915_gem_object_save_bit_17_swizzle(obj);

for (i = 0; i < page_count; i++)
if (obj_priv->pages[i] != NULL) {
if (obj_priv->dirty)
Expand Down Expand Up @@ -1923,6 +2024,10 @@ i915_gem_object_get_pages(struct drm_gem_object *obj)
}
obj_priv->pages[i] = page;
}

if (obj_priv->tiling_mode != I915_TILING_NONE)
i915_gem_object_do_bit_17_swizzle(obj);

return 0;
}

Expand Down Expand Up @@ -3601,6 +3706,7 @@ void i915_gem_free_object(struct drm_gem_object *obj)
i915_gem_free_mmap_offset(obj);

drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER);
kfree(obj_priv->bit_17);
drm_free(obj->driver_private, 1, DRM_MEM_DRIVER);
}

Expand Down
111 changes: 109 additions & 2 deletions drivers/gpu/drm/i915/i915_gem_tiling.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
*
*/

#include "linux/string.h"
#include "linux/bitops.h"
#include "drmP.h"
#include "drm.h"
#include "i915_drm.h"
Expand Down Expand Up @@ -127,8 +129,8 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
swizzle_y = I915_BIT_6_SWIZZLE_9_11;
} else {
/* Bit 17 swizzling by the CPU in addition. */
swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
swizzle_x = I915_BIT_6_SWIZZLE_9_10_17;
swizzle_y = I915_BIT_6_SWIZZLE_9_17;
}
break;
}
Expand Down Expand Up @@ -288,6 +290,19 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x;
else
args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y;

/* Hide bit 17 swizzling from the user. This prevents old Mesa
* from aborting the application on sw fallbacks to bit 17,
* and we use the pread/pwrite bit17 paths to swizzle for it.
* If there was a user that was relying on the swizzle
* information for drm_intel_bo_map()ed reads/writes this would
* break it, but we don't have any of those.
*/
if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17)
args->swizzle_mode = I915_BIT_6_SWIZZLE_9;
if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17)
args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10;

/* If we can't handle the swizzling, make it untiled. */
if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) {
args->tiling_mode = I915_TILING_NONE;
Expand Down Expand Up @@ -354,8 +369,100 @@ i915_gem_get_tiling(struct drm_device *dev, void *data,
DRM_ERROR("unknown tiling mode\n");
}

/* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */
if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17)
args->swizzle_mode = I915_BIT_6_SWIZZLE_9;
if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17)
args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10;

drm_gem_object_unreference(obj);
mutex_unlock(&dev->struct_mutex);

return 0;
}

/**
* Swap every 64 bytes of this page around, to account for it having a new
* bit 17 of its physical address and therefore being interpreted differently
* by the GPU.
*/
static int
i915_gem_swizzle_page(struct page *page)
{
char *vaddr;
int i;
char temp[64];

vaddr = kmap(page);
if (vaddr == NULL)
return -ENOMEM;

for (i = 0; i < PAGE_SIZE; i += 128) {
memcpy(temp, &vaddr[i], 64);
memcpy(&vaddr[i], &vaddr[i + 64], 64);
memcpy(&vaddr[i + 64], temp, 64);
}

kunmap(page);

return 0;
}

void
i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj)
{
struct drm_device *dev = obj->dev;
drm_i915_private_t *dev_priv = dev->dev_private;
struct drm_i915_gem_object *obj_priv = obj->driver_private;
int page_count = obj->size >> PAGE_SHIFT;
int i;

if (dev_priv->mm.bit_6_swizzle_x != I915_BIT_6_SWIZZLE_9_10_17)
return;

if (obj_priv->bit_17 == NULL)
return;

for (i = 0; i < page_count; i++) {
char new_bit_17 = page_to_phys(obj_priv->pages[i]) >> 17;
if ((new_bit_17 & 0x1) !=
(test_bit(i, obj_priv->bit_17) != 0)) {
int ret = i915_gem_swizzle_page(obj_priv->pages[i]);
if (ret != 0) {
DRM_ERROR("Failed to swizzle page\n");
return;
}
set_page_dirty(obj_priv->pages[i]);
}
}
}

void
i915_gem_object_save_bit_17_swizzle(struct drm_gem_object *obj)
{
struct drm_device *dev = obj->dev;
drm_i915_private_t *dev_priv = dev->dev_private;
struct drm_i915_gem_object *obj_priv = obj->driver_private;
int page_count = obj->size >> PAGE_SHIFT;
int i;

if (dev_priv->mm.bit_6_swizzle_x != I915_BIT_6_SWIZZLE_9_10_17)
return;

if (obj_priv->bit_17 == NULL) {
obj_priv->bit_17 = kmalloc(BITS_TO_LONGS(page_count) *
sizeof(long), GFP_KERNEL);
if (obj_priv->bit_17 == NULL) {
DRM_ERROR("Failed to allocate memory for bit 17 "
"record\n");
return;
}
}

for (i = 0; i < page_count; i++) {
if (page_to_phys(obj_priv->pages[i]) & (1 << 17))
__set_bit(i, obj_priv->bit_17);
else
__clear_bit(i, obj_priv->bit_17);
}
}
3 changes: 3 additions & 0 deletions include/drm/i915_drm.h
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,9 @@ struct drm_i915_gem_busy {
#define I915_BIT_6_SWIZZLE_9_10_11 4
/* Not seen by userland */
#define I915_BIT_6_SWIZZLE_UNKNOWN 5
/* Seen by userland. */
#define I915_BIT_6_SWIZZLE_9_17 6
#define I915_BIT_6_SWIZZLE_9_10_17 7

struct drm_i915_gem_set_tiling {
/** Handle of the buffer to have its tiling state updated */
Expand Down

0 comments on commit 280b713

Please sign in to comment.