Skip to content

Commit

Permalink
drm/radeon: rework ring syncing code
Browse files Browse the repository at this point in the history
Move inter ring syncing with semaphores into the
existing ring allocations, with that we need to
lock the ring mutex only once.

Signed-off-by: Christian König <deathsimple@vodafone.de>
Reviewed-by: Jerome Glisse <jglisse@redhat.com>
  • Loading branch information
Christian König committed Jun 21, 2012
1 parent 68e250b commit 220907d
Show file tree
Hide file tree
Showing 10 changed files with 92 additions and 116 deletions.
3 changes: 2 additions & 1 deletion drivers/gpu/drm/radeon/evergreen_blit_kms.c
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,8 @@ int evergreen_blit_init(struct radeon_device *rdev)
rdev->r600_blit.primitives.draw_auto = draw_auto;
rdev->r600_blit.primitives.set_default_state = set_default_state;

rdev->r600_blit.ring_size_common = 55; /* shaders + def state */
rdev->r600_blit.ring_size_common = 8; /* sync semaphore */
rdev->r600_blit.ring_size_common += 55; /* shaders + def state */
rdev->r600_blit.ring_size_common += 16; /* fence emit for VB IB */
rdev->r600_blit.ring_size_common += 5; /* done copy */
rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */
Expand Down
5 changes: 3 additions & 2 deletions drivers/gpu/drm/radeon/r600.c
Original file line number Diff line number Diff line change
Expand Up @@ -2311,15 +2311,16 @@ int r600_copy_blit(struct radeon_device *rdev,
unsigned num_gpu_pages,
struct radeon_fence **fence)
{
struct radeon_semaphore *sem = NULL;
struct radeon_sa_bo *vb = NULL;
int r;

r = r600_blit_prepare_copy(rdev, num_gpu_pages, &vb);
r = r600_blit_prepare_copy(rdev, num_gpu_pages, fence, &vb, &sem);
if (r) {
return r;
}
r600_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages, vb);
r600_blit_done_copy(rdev, fence, vb);
r600_blit_done_copy(rdev, fence, vb, sem);
return 0;
}

Expand Down
24 changes: 21 additions & 3 deletions drivers/gpu/drm/radeon/r600_blit_kms.c
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,8 @@ int r600_blit_init(struct radeon_device *rdev)
rdev->r600_blit.primitives.draw_auto = draw_auto;
rdev->r600_blit.primitives.set_default_state = set_default_state;

rdev->r600_blit.ring_size_common = 40; /* shaders + def state */
rdev->r600_blit.ring_size_common = 8; /* sync semaphore */
rdev->r600_blit.ring_size_common += 40; /* shaders + def state */
rdev->r600_blit.ring_size_common += 5; /* done copy */
rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */

Expand Down Expand Up @@ -666,7 +667,8 @@ static unsigned r600_blit_create_rect(unsigned num_gpu_pages,


int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages,
struct radeon_sa_bo **vb)
struct radeon_fence **fence, struct radeon_sa_bo **vb,
struct radeon_semaphore **sem)
{
struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
int r;
Expand All @@ -689,22 +691,37 @@ int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages,
return r;
}

r = radeon_semaphore_create(rdev, sem);
if (r) {
radeon_sa_bo_free(rdev, vb, NULL);
return r;
}

/* calculate number of loops correctly */
ring_size = num_loops * dwords_per_loop;
ring_size += rdev->r600_blit.ring_size_common;
r = radeon_ring_lock(rdev, ring, ring_size);
if (r) {
radeon_sa_bo_free(rdev, vb, NULL);
radeon_semaphore_free(rdev, sem, NULL);
return r;
}

if (radeon_fence_need_sync(*fence, RADEON_RING_TYPE_GFX_INDEX)) {
radeon_semaphore_sync_rings(rdev, *sem, (*fence)->ring,
RADEON_RING_TYPE_GFX_INDEX);
radeon_fence_note_sync(*fence, RADEON_RING_TYPE_GFX_INDEX);
} else {
radeon_semaphore_free(rdev, sem, NULL);
}

rdev->r600_blit.primitives.set_default_state(rdev);
rdev->r600_blit.primitives.set_shaders(rdev);
return 0;
}

void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence,
struct radeon_sa_bo *vb)
struct radeon_sa_bo *vb, struct radeon_semaphore *sem)
{
struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
int r;
Expand All @@ -717,6 +734,7 @@ void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence

radeon_ring_unlock_commit(rdev, ring);
radeon_sa_bo_free(rdev, &vb, *fence);
radeon_semaphore_free(rdev, &sem, *fence);
}

void r600_kms_blit_copy(struct radeon_device *rdev,
Expand Down
6 changes: 3 additions & 3 deletions drivers/gpu/drm/radeon/radeon.h
Original file line number Diff line number Diff line change
Expand Up @@ -470,10 +470,9 @@ void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
struct radeon_semaphore *semaphore);
int radeon_semaphore_sync_rings(struct radeon_device *rdev,
struct radeon_semaphore *semaphore,
bool sync_to[RADEON_NUM_RINGS],
int dst_ring);
int signaler, int waiter);
void radeon_semaphore_free(struct radeon_device *rdev,
struct radeon_semaphore *semaphore,
struct radeon_semaphore **semaphore,
struct radeon_fence *fence);

/*
Expand Down Expand Up @@ -653,6 +652,7 @@ struct radeon_ib {
struct radeon_fence *fence;
unsigned vm_id;
bool is_const_ib;
struct radeon_fence *sync_to[RADEON_NUM_RINGS];
struct radeon_semaphore *semaphore;
};

Expand Down
5 changes: 3 additions & 2 deletions drivers/gpu/drm/radeon/radeon_asic.h
Original file line number Diff line number Diff line change
Expand Up @@ -363,9 +363,10 @@ int r600_hdmi_buffer_status_changed(struct drm_encoder *encoder);
void r600_hdmi_update_audio_settings(struct drm_encoder *encoder);
/* r600 blit */
int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages,
struct radeon_sa_bo **vb);
struct radeon_fence **fence, struct radeon_sa_bo **vb,
struct radeon_semaphore **sem);
void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence,
struct radeon_sa_bo *vb);
struct radeon_sa_bo *vb, struct radeon_semaphore *sem);
void r600_kms_blit_copy(struct radeon_device *rdev,
u64 src_gpu_addr, u64 dst_gpu_addr,
unsigned num_gpu_pages,
Expand Down
38 changes: 8 additions & 30 deletions drivers/gpu/drm/radeon/radeon_cs.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,36 +115,20 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority
return 0;
}

static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
static void radeon_cs_sync_rings(struct radeon_cs_parser *p)
{
bool sync_to_ring[RADEON_NUM_RINGS] = { };
bool need_sync = false;
int i, r;
int i;

for (i = 0; i < p->nrelocs; i++) {
struct radeon_fence *fence;
struct radeon_fence *a, *b;

if (!p->relocs[i].robj || !p->relocs[i].robj->tbo.sync_obj)
continue;

fence = p->relocs[i].robj->tbo.sync_obj;
if (fence->ring != p->ring && !radeon_fence_signaled(fence)) {
sync_to_ring[fence->ring] = true;
need_sync = true;
}
}

if (!need_sync) {
return 0;
}

r = radeon_semaphore_create(p->rdev, &p->ib.semaphore);
if (r) {
return r;
a = p->relocs[i].robj->tbo.sync_obj;
b = p->ib.sync_to[a->ring];
p->ib.sync_to[a->ring] = radeon_fence_later(a, b);
}

return radeon_semaphore_sync_rings(p->rdev, p->ib.semaphore,
sync_to_ring, p->ring);
}

/* XXX: note that this is called from the legacy UMS CS ioctl as well */
Expand Down Expand Up @@ -368,10 +352,7 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev,
DRM_ERROR("Invalid command stream !\n");
return r;
}
r = radeon_cs_sync_rings(parser);
if (r) {
DRM_ERROR("Failed to synchronize rings !\n");
}
radeon_cs_sync_rings(parser);
parser->ib.vm_id = 0;
r = radeon_ib_schedule(rdev, &parser->ib);
if (r) {
Expand Down Expand Up @@ -468,10 +449,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
if (r) {
goto out;
}
r = radeon_cs_sync_rings(parser);
if (r) {
DRM_ERROR("Failed to synchronize rings !\n");
}
radeon_cs_sync_rings(parser);

if ((rdev->family >= CHIP_TAHITI) &&
(parser->chunk_const_ib_idx != -1)) {
Expand Down
30 changes: 25 additions & 5 deletions drivers/gpu/drm/radeon/radeon_ring.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,36 +42,43 @@ int radeon_debugfs_sa_init(struct radeon_device *rdev);
int radeon_ib_get(struct radeon_device *rdev, int ring,
struct radeon_ib *ib, unsigned size)
{
int r;
int i, r;

r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256, true);
if (r) {
dev_err(rdev->dev, "failed to get a new IB (%d)\n", r);
return r;
}

r = radeon_semaphore_create(rdev, &ib->semaphore);
if (r) {
return r;
}

ib->ring = ring;
ib->fence = NULL;
ib->ptr = radeon_sa_bo_cpu_addr(ib->sa_bo);
ib->gpu_addr = radeon_sa_bo_gpu_addr(ib->sa_bo);
ib->vm_id = 0;
ib->is_const_ib = false;
ib->semaphore = NULL;
for (i = 0; i < RADEON_NUM_RINGS; ++i)
ib->sync_to[i] = NULL;

return 0;
}

void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib)
{
radeon_semaphore_free(rdev, ib->semaphore, ib->fence);
radeon_semaphore_free(rdev, &ib->semaphore, ib->fence);
radeon_sa_bo_free(rdev, &ib->sa_bo, ib->fence);
radeon_fence_unref(&ib->fence);
}

int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
{
struct radeon_ring *ring = &rdev->ring[ib->ring];
int r = 0;
bool need_sync = false;
int i, r = 0;

if (!ib->length_dw || !ring->ready) {
/* TODO: Nothings in the ib we should report. */
Expand All @@ -80,11 +87,24 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
}

/* 64 dwords should be enough for fence too */
r = radeon_ring_lock(rdev, ring, 64);
r = radeon_ring_lock(rdev, ring, 64 + RADEON_NUM_RINGS * 8);
if (r) {
dev_err(rdev->dev, "scheduling IB failed (%d).\n", r);
return r;
}
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
struct radeon_fence *fence = ib->sync_to[i];
if (radeon_fence_need_sync(fence, ib->ring)) {
need_sync = true;
radeon_semaphore_sync_rings(rdev, ib->semaphore,
fence->ring, ib->ring);
radeon_fence_note_sync(fence, ib->ring);
}
}
/* immediately free semaphore when we don't need to sync */
if (!need_sync) {
radeon_semaphore_free(rdev, &ib->semaphore, NULL);
}
radeon_ring_ib_execute(rdev, ib->ring, ib);
r = radeon_fence_emit(rdev, &ib->fence, ib->ring);
if (r) {
Expand Down
71 changes: 25 additions & 46 deletions drivers/gpu/drm/radeon/radeon_semaphore.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,70 +68,49 @@ void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, true);
}

/* caller must hold ring lock */
int radeon_semaphore_sync_rings(struct radeon_device *rdev,
struct radeon_semaphore *semaphore,
bool sync_to[RADEON_NUM_RINGS],
int dst_ring)
int signaler, int waiter)
{
int i = 0, r;
int r;

mutex_lock(&rdev->ring_lock);
r = radeon_ring_alloc(rdev, &rdev->ring[dst_ring], RADEON_NUM_RINGS * 8);
if (r) {
goto error;
/* no need to signal and wait on the same ring */
if (signaler == waiter) {
return 0;
}

for (i = 0; i < RADEON_NUM_RINGS; ++i) {
/* no need to sync to our own or unused rings */
if (!sync_to[i] || i == dst_ring)
continue;

/* prevent GPU deadlocks */
if (!rdev->ring[i].ready) {
dev_err(rdev->dev, "Trying to sync to a disabled ring!");
r = -EINVAL;
goto error;
}

r = radeon_ring_alloc(rdev, &rdev->ring[i], 8);
if (r) {
goto error;
}

radeon_semaphore_emit_signal(rdev, i, semaphore);
radeon_semaphore_emit_wait(rdev, dst_ring, semaphore);
/* prevent GPU deadlocks */
if (!rdev->ring[signaler].ready) {
dev_err(rdev->dev, "Trying to sync to a disabled ring!");
return -EINVAL;
}

radeon_ring_commit(rdev, &rdev->ring[i]);
r = radeon_ring_alloc(rdev, &rdev->ring[signaler], 8);
if (r) {
return r;
}
radeon_semaphore_emit_signal(rdev, signaler, semaphore);
radeon_ring_commit(rdev, &rdev->ring[signaler]);

radeon_ring_commit(rdev, &rdev->ring[dst_ring]);
mutex_unlock(&rdev->ring_lock);
/* we assume caller has already allocated space on waiters ring */
radeon_semaphore_emit_wait(rdev, waiter, semaphore);

return 0;

error:
/* unlock all locks taken so far */
for (--i; i >= 0; --i) {
if (sync_to[i] || i == dst_ring) {
radeon_ring_undo(&rdev->ring[i]);
}
}
radeon_ring_undo(&rdev->ring[dst_ring]);
mutex_unlock(&rdev->ring_lock);
return r;
}

void radeon_semaphore_free(struct radeon_device *rdev,
struct radeon_semaphore *semaphore,
struct radeon_semaphore **semaphore,
struct radeon_fence *fence)
{
if (semaphore == NULL) {
if (semaphore == NULL || *semaphore == NULL) {
return;
}
if (semaphore->waiters > 0) {
if ((*semaphore)->waiters > 0) {
dev_err(rdev->dev, "semaphore %p has more waiters than signalers,"
" hardware lockup imminent!\n", semaphore);
" hardware lockup imminent!\n", *semaphore);
}
radeon_sa_bo_free(rdev, &semaphore->sa_bo, fence);
kfree(semaphore);
radeon_sa_bo_free(rdev, &(*semaphore)->sa_bo, fence);
kfree(*semaphore);
*semaphore = NULL;
}
6 changes: 2 additions & 4 deletions drivers/gpu/drm/radeon/radeon_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -303,8 +303,7 @@ void radeon_test_ring_sync(struct radeon_device *rdev,
}

out_cleanup:
if (semaphore)
radeon_semaphore_free(rdev, semaphore, NULL);
radeon_semaphore_free(rdev, &semaphore, NULL);

if (fence1)
radeon_fence_unref(&fence1);
Expand Down Expand Up @@ -422,8 +421,7 @@ void radeon_test_ring_sync2(struct radeon_device *rdev,
}

out_cleanup:
if (semaphore)
radeon_semaphore_free(rdev, semaphore, NULL);
radeon_semaphore_free(rdev, &semaphore, NULL);

if (fenceA)
radeon_fence_unref(&fenceA);
Expand Down
Loading

0 comments on commit 220907d

Please sign in to comment.