Skip to content

Commit

Permalink
drm/ttm: add a way to bo_wait for either the last read or last write
Browse files Browse the repository at this point in the history
Sometimes we want to know whether a buffer is busy and wait for it (bo_wait).
However, sometimes it would be more useful to be able to query whether
a buffer is busy and being either read or written, and wait until it's stopped
being either read or written. The point of this is to be able to avoid
unnecessary waiting, e.g. if a GPU has written something to a buffer and is now
reading that buffer, and a CPU wants to map that buffer for read, it needs to
only wait for the last write. If there were no write, there wouldn't be any
waiting needed.

This, or course, requires user space drivers to send read/write flags
with each relocation (like we have read/write domains in radeon, so we can
actually use those for something useful now).

Now how this patch works:

The read/write flags should passed to ttm_validate_buffer. TTM maintains
separate sync objects of the last read and write for each buffer, in addition
to the sync object of the last use of a buffer. ttm_bo_wait then operates
with one the sync objects.

Signed-off-by: Marek Olšák <maraeo@gmail.com>
Reviewed-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
  • Loading branch information
Marek Olšák authored and Dave Airlie committed Aug 31, 2011
1 parent b464e9a commit dfadbbd
Show file tree
Hide file tree
Showing 11 changed files with 139 additions and 39 deletions.
3 changes: 2 additions & 1 deletion drivers/gpu/drm/nouveau/nouveau_bo.c
Original file line number Diff line number Diff line change
Expand Up @@ -1104,7 +1104,8 @@ nouveau_bo_vma_del(struct nouveau_bo *nvbo, struct nouveau_vma *vma)
if (vma->node) {
if (nvbo->bo.mem.mem_type != TTM_PL_SYSTEM) {
spin_lock(&nvbo->bo.bdev->fence_lock);
ttm_bo_wait(&nvbo->bo, false, false, false);
ttm_bo_wait(&nvbo->bo, false, false, false,
TTM_USAGE_READWRITE);
spin_unlock(&nvbo->bo.bdev->fence_lock);
nouveau_vm_unmap(vma);
}
Expand Down
5 changes: 3 additions & 2 deletions drivers/gpu/drm/nouveau/nouveau_gem.c
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,8 @@ nouveau_gem_pushbuf_reloc_apply(struct drm_device *dev,
}

spin_lock(&nvbo->bo.bdev->fence_lock);
ret = ttm_bo_wait(&nvbo->bo, false, false, false);
ret = ttm_bo_wait(&nvbo->bo, false, false, false,
TTM_USAGE_READWRITE);
spin_unlock(&nvbo->bo.bdev->fence_lock);
if (ret) {
NV_ERROR(dev, "reloc wait_idle failed: %d\n", ret);
Expand Down Expand Up @@ -825,7 +826,7 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data,
nvbo = nouveau_gem_object(gem);

spin_lock(&nvbo->bo.bdev->fence_lock);
ret = ttm_bo_wait(&nvbo->bo, true, true, no_wait);
ret = ttm_bo_wait(&nvbo->bo, true, true, no_wait, TTM_USAGE_READWRITE);
spin_unlock(&nvbo->bo.bdev->fence_lock);
drm_gem_object_unreference_unlocked(gem);
return ret;
Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/radeon/radeon_cs.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
p->relocs[i].lobj.wdomain = r->write_domain;
p->relocs[i].lobj.rdomain = r->read_domains;
p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo;
p->relocs[i].lobj.tv.usage = TTM_USAGE_READWRITE;
p->relocs[i].handle = r->handle;
p->relocs[i].flags = r->flags;
radeon_bo_list_add_object(&p->relocs[i].lobj,
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/radeon/radeon_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ static inline int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type,
if (mem_type)
*mem_type = bo->tbo.mem.mem_type;
if (bo->tbo.sync_obj)
r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
r = ttm_bo_wait(&bo->tbo, true, true, no_wait, TTM_USAGE_READWRITE);
spin_unlock(&bo->tbo.bdev->fence_lock);
ttm_bo_unreserve(&bo->tbo);
return r;
Expand Down
97 changes: 71 additions & 26 deletions drivers/gpu/drm/ttm/ttm_bo.c
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
int ret;

spin_lock(&bdev->fence_lock);
(void) ttm_bo_wait(bo, false, false, true);
(void) ttm_bo_wait(bo, false, false, true, TTM_USAGE_READWRITE);
if (!bo->sync_obj) {

spin_lock(&glob->lru_lock);
Expand Down Expand Up @@ -566,7 +566,8 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,

retry:
spin_lock(&bdev->fence_lock);
ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu,
TTM_USAGE_READWRITE);
spin_unlock(&bdev->fence_lock);

if (unlikely(ret != 0))
Expand Down Expand Up @@ -725,7 +726,8 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
int ret = 0;

spin_lock(&bdev->fence_lock);
ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu,
TTM_USAGE_READWRITE);
spin_unlock(&bdev->fence_lock);

if (unlikely(ret != 0)) {
Expand Down Expand Up @@ -1072,7 +1074,8 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
* instead of doing it here.
*/
spin_lock(&bdev->fence_lock);
ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu,
TTM_USAGE_READWRITE);
spin_unlock(&bdev->fence_lock);
if (ret)
return ret;
Expand Down Expand Up @@ -1692,34 +1695,83 @@ static int ttm_bo_setup_vm(struct ttm_buffer_object *bo)
return ret;
}

static void ttm_bo_unref_sync_obj_locked(struct ttm_buffer_object *bo,
void *sync_obj,
void **extra_sync_obj)
{
struct ttm_bo_device *bdev = bo->bdev;
struct ttm_bo_driver *driver = bdev->driver;
void *tmp_obj = NULL, *tmp_obj_read = NULL, *tmp_obj_write = NULL;

/* We must unref the sync obj wherever it's ref'd.
* Note that if we unref bo->sync_obj, we can unref both the read
* and write sync objs too, because they can't be newer than
* bo->sync_obj, so they are no longer relevant. */
if (sync_obj == bo->sync_obj ||
sync_obj == bo->sync_obj_read) {
tmp_obj_read = bo->sync_obj_read;
bo->sync_obj_read = NULL;
}
if (sync_obj == bo->sync_obj ||
sync_obj == bo->sync_obj_write) {
tmp_obj_write = bo->sync_obj_write;
bo->sync_obj_write = NULL;
}
if (sync_obj == bo->sync_obj) {
tmp_obj = bo->sync_obj;
bo->sync_obj = NULL;
}

clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
spin_unlock(&bdev->fence_lock);
if (tmp_obj)
driver->sync_obj_unref(&tmp_obj);
if (tmp_obj_read)
driver->sync_obj_unref(&tmp_obj_read);
if (tmp_obj_write)
driver->sync_obj_unref(&tmp_obj_write);
if (extra_sync_obj)
driver->sync_obj_unref(extra_sync_obj);
spin_lock(&bdev->fence_lock);
}

int ttm_bo_wait(struct ttm_buffer_object *bo,
bool lazy, bool interruptible, bool no_wait)
bool lazy, bool interruptible, bool no_wait,
enum ttm_buffer_usage usage)
{
struct ttm_bo_driver *driver = bo->bdev->driver;
struct ttm_bo_device *bdev = bo->bdev;
void *sync_obj;
void *sync_obj_arg;
int ret = 0;
void **bo_sync_obj;

if (likely(bo->sync_obj == NULL))
switch (usage) {
case TTM_USAGE_READ:
bo_sync_obj = &bo->sync_obj_read;
break;
case TTM_USAGE_WRITE:
bo_sync_obj = &bo->sync_obj_write;
break;
case TTM_USAGE_READWRITE:
default:
bo_sync_obj = &bo->sync_obj;
}

if (likely(*bo_sync_obj == NULL))
return 0;

while (bo->sync_obj) {
while (*bo_sync_obj) {

if (driver->sync_obj_signaled(bo->sync_obj, bo->sync_obj_arg)) {
void *tmp_obj = bo->sync_obj;
bo->sync_obj = NULL;
clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
spin_unlock(&bdev->fence_lock);
driver->sync_obj_unref(&tmp_obj);
spin_lock(&bdev->fence_lock);
if (driver->sync_obj_signaled(*bo_sync_obj, bo->sync_obj_arg)) {
ttm_bo_unref_sync_obj_locked(bo, *bo_sync_obj, NULL);
continue;
}

if (no_wait)
return -EBUSY;

sync_obj = driver->sync_obj_ref(bo->sync_obj);
sync_obj = driver->sync_obj_ref(*bo_sync_obj);
sync_obj_arg = bo->sync_obj_arg;
spin_unlock(&bdev->fence_lock);
ret = driver->sync_obj_wait(sync_obj, sync_obj_arg,
Expand All @@ -1730,16 +1782,9 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
return ret;
}
spin_lock(&bdev->fence_lock);
if (likely(bo->sync_obj == sync_obj &&
if (likely(*bo_sync_obj == sync_obj &&
bo->sync_obj_arg == sync_obj_arg)) {
void *tmp_obj = bo->sync_obj;
bo->sync_obj = NULL;
clear_bit(TTM_BO_PRIV_FLAG_MOVING,
&bo->priv_flags);
spin_unlock(&bdev->fence_lock);
driver->sync_obj_unref(&sync_obj);
driver->sync_obj_unref(&tmp_obj);
spin_lock(&bdev->fence_lock);
ttm_bo_unref_sync_obj_locked(bo, *bo_sync_obj, &sync_obj);
} else {
spin_unlock(&bdev->fence_lock);
driver->sync_obj_unref(&sync_obj);
Expand All @@ -1763,7 +1808,7 @@ int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait)
if (unlikely(ret != 0))
return ret;
spin_lock(&bdev->fence_lock);
ret = ttm_bo_wait(bo, false, true, no_wait);
ret = ttm_bo_wait(bo, false, true, no_wait, TTM_USAGE_READWRITE);
spin_unlock(&bdev->fence_lock);
if (likely(ret == 0))
atomic_inc(&bo->cpu_writers);
Expand Down Expand Up @@ -1837,7 +1882,7 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
*/

spin_lock(&bo->bdev->fence_lock);
ret = ttm_bo_wait(bo, false, false, false);
ret = ttm_bo_wait(bo, false, false, false, TTM_USAGE_READWRITE);
spin_unlock(&bo->bdev->fence_lock);

if (unlikely(ret != 0))
Expand Down
26 changes: 21 additions & 5 deletions drivers/gpu/drm/ttm/ttm_bo_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,8 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
atomic_set(&fbo->cpu_writers, 0);

fbo->sync_obj = driver->sync_obj_ref(bo->sync_obj);
fbo->sync_obj_read = driver->sync_obj_ref(bo->sync_obj_read);
fbo->sync_obj_write = driver->sync_obj_ref(bo->sync_obj_write);
kref_init(&fbo->list_kref);
kref_init(&fbo->kref);
fbo->destroy = &ttm_transfered_destroy;
Expand Down Expand Up @@ -618,20 +620,30 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
struct ttm_mem_reg *old_mem = &bo->mem;
int ret;
struct ttm_buffer_object *ghost_obj;
void *tmp_obj = NULL;
void *tmp_obj = NULL, *tmp_obj_read = NULL, *tmp_obj_write = NULL;

spin_lock(&bdev->fence_lock);
if (bo->sync_obj) {
if (bo->sync_obj)
tmp_obj = bo->sync_obj;
bo->sync_obj = NULL;
}
if (bo->sync_obj_read)
tmp_obj_read = bo->sync_obj_read;
if (bo->sync_obj_write)
tmp_obj_write = bo->sync_obj_write;

bo->sync_obj = driver->sync_obj_ref(sync_obj);
bo->sync_obj_read = driver->sync_obj_ref(sync_obj);
bo->sync_obj_write = driver->sync_obj_ref(sync_obj);
bo->sync_obj_arg = sync_obj_arg;
if (evict) {
ret = ttm_bo_wait(bo, false, false, false);
ret = ttm_bo_wait(bo, false, false, false,
TTM_USAGE_READWRITE);
spin_unlock(&bdev->fence_lock);
if (tmp_obj)
driver->sync_obj_unref(&tmp_obj);
if (tmp_obj_read)
driver->sync_obj_unref(&tmp_obj_read);
if (tmp_obj_write)
driver->sync_obj_unref(&tmp_obj_write);
if (ret)
return ret;

Expand All @@ -655,6 +667,10 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
spin_unlock(&bdev->fence_lock);
if (tmp_obj)
driver->sync_obj_unref(&tmp_obj);
if (tmp_obj_read)
driver->sync_obj_unref(&tmp_obj_read);
if (tmp_obj_write)
driver->sync_obj_unref(&tmp_obj_write);

ret = ttm_buffer_object_transfer(bo, &ghost_obj);
if (ret)
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/ttm/ttm_bo_vm.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)

spin_lock(&bdev->fence_lock);
if (test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)) {
ret = ttm_bo_wait(bo, false, true, false);
ret = ttm_bo_wait(bo, false, true, false, TTM_USAGE_READWRITE);
spin_unlock(&bdev->fence_lock);
if (unlikely(ret != 0)) {
retval = (ret != -ERESTARTSYS) ?
Expand Down
19 changes: 18 additions & 1 deletion drivers/gpu/drm/ttm/ttm_execbuf_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -221,8 +221,18 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj)

list_for_each_entry(entry, list, head) {
bo = entry->bo;
entry->old_sync_obj_read = NULL;
entry->old_sync_obj_write = NULL;
entry->old_sync_obj = bo->sync_obj;
bo->sync_obj = driver->sync_obj_ref(sync_obj);
if (entry->usage & TTM_USAGE_READ) {
entry->old_sync_obj_read = bo->sync_obj_read;
bo->sync_obj_read = driver->sync_obj_ref(sync_obj);
}
if (entry->usage & TTM_USAGE_WRITE) {
entry->old_sync_obj_write = bo->sync_obj_write;
bo->sync_obj_write = driver->sync_obj_ref(sync_obj);
}
bo->sync_obj_arg = entry->new_sync_obj_arg;
ttm_bo_unreserve_locked(bo);
entry->reserved = false;
Expand All @@ -231,8 +241,15 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj)
spin_unlock(&bdev->fence_lock);

list_for_each_entry(entry, list, head) {
if (entry->old_sync_obj)
if (entry->old_sync_obj) {
driver->sync_obj_unref(&entry->old_sync_obj);
}
if (entry->old_sync_obj_read) {
driver->sync_obj_unref(&entry->old_sync_obj_read);
}
if (entry->old_sync_obj_write) {
driver->sync_obj_unref(&entry->old_sync_obj_write);
}
}
}
EXPORT_SYMBOL(ttm_eu_fence_buffer_objects);
1 change: 1 addition & 0 deletions drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
if (unlikely(cur_validate_node == sw_context->cur_val_buf)) {
val_buf = &sw_context->val_bufs[cur_validate_node];
val_buf->bo = ttm_bo_reference(bo);
val_buf->usage = TTM_USAGE_READWRITE;
val_buf->new_sync_obj_arg = (void *) dev_priv;
list_add_tail(&val_buf->head, &sw_context->validate_nodes);
++sw_context->cur_val_buf;
Expand Down
16 changes: 14 additions & 2 deletions include/drm/ttm/ttm_bo_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ struct ttm_bo_device;

struct drm_mm_node;

enum ttm_buffer_usage {
TTM_USAGE_READ = 1,
TTM_USAGE_WRITE = 2,
TTM_USAGE_READWRITE = TTM_USAGE_READ | TTM_USAGE_WRITE
};

/**
* struct ttm_placement
Expand Down Expand Up @@ -174,7 +179,10 @@ struct ttm_tt;
* the bo_device::lru_lock.
* @reserved: Deadlock-free lock used for synchronization state transitions.
* @sync_obj_arg: Opaque argument to synchronization object function.
* @sync_obj: Pointer to a synchronization object.
* @sync_obj: Pointer to a synchronization object of a last read or write,
* whichever is later.
* @sync_obj_read: Pointer to a synchronization object of a last read.
* @sync_obj_write: Pointer to a synchronization object of a last write.
* @priv_flags: Flags describing buffer object internal state.
* @vm_rb: Rb node for the vm rb tree.
* @vm_node: Address space manager node.
Expand Down Expand Up @@ -258,6 +266,8 @@ struct ttm_buffer_object {

void *sync_obj_arg;
void *sync_obj;
void *sync_obj_read;
void *sync_obj_write;
unsigned long priv_flags;

/**
Expand Down Expand Up @@ -325,6 +335,7 @@ ttm_bo_reference(struct ttm_buffer_object *bo)
* @bo: The buffer object.
* @interruptible: Use interruptible wait.
* @no_wait: Return immediately if buffer is busy.
* @usage: Whether to wait for the last read and/or the last write.
*
* This function must be called with the bo::mutex held, and makes
* sure any previous rendering to the buffer is completed.
Expand All @@ -334,7 +345,8 @@ ttm_bo_reference(struct ttm_buffer_object *bo)
* Returns -ERESTARTSYS if interrupted by a signal.
*/
extern int ttm_bo_wait(struct ttm_buffer_object *bo, bool lazy,
bool interruptible, bool no_wait);
bool interruptible, bool no_wait,
enum ttm_buffer_usage usage);
/**
* ttm_bo_validate
*
Expand Down
Loading

0 comments on commit dfadbbd

Please sign in to comment.