Skip to content

Commit

Permalink
drm/radeon: use IBs for VM page table updates v2
Browse files Browse the repository at this point in the history
For very large page table updates, we can exceed the
size of the ring.  To avoid this, use an IB to perform
the page table update.

v2(ck): cleanup the IB infrastructure and the use it instead
        of filling the struct ourself.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
  • Loading branch information
Alex Deucher committed Feb 1, 2013
1 parent 24178ec commit 43f1214
Show file tree
Hide file tree
Showing 7 changed files with 103 additions and 92 deletions.
30 changes: 17 additions & 13 deletions drivers/gpu/drm/radeon/ni.c
Original file line number Diff line number Diff line change
Expand Up @@ -1946,19 +1946,21 @@ uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags)
* cayman_vm_set_page - update the page tables using the CP
*
* @rdev: radeon_device pointer
* @ib: indirect buffer to fill with commands
* @pe: addr of the page entry
* @addr: dst addr to write into pe
* @count: number of page entries to update
* @incr: increase next addr by incr bytes
* @flags: access flags
*
* Update the page tables using the CP (cayman-si).
* Update the page tables using the CP (cayman/TN).
*/
void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
void cayman_vm_set_page(struct radeon_device *rdev,
struct radeon_ib *ib,
uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags)
{
struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index];
uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
uint64_t value;
unsigned ndw;
Expand All @@ -1969,9 +1971,9 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
if (ndw > 0x3FFF)
ndw = 0x3FFF;

radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, ndw));
radeon_ring_write(ring, pe);
radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
ib->ptr[ib->length_dw++] = PACKET3(PACKET3_ME_WRITE, ndw);
ib->ptr[ib->length_dw++] = pe;
ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
for (; ndw > 1; ndw -= 2, --count, pe += 8) {
if (flags & RADEON_VM_PAGE_SYSTEM) {
value = radeon_vm_map_gart(rdev, addr);
Expand All @@ -1983,8 +1985,8 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
}
addr += incr;
value |= r600_flags;
radeon_ring_write(ring, value);
radeon_ring_write(ring, upper_32_bits(value));
ib->ptr[ib->length_dw++] = value;
ib->ptr[ib->length_dw++] = upper_32_bits(value);
}
}
} else {
Expand All @@ -1994,9 +1996,9 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
ndw = 0xFFFFE;

/* for non-physically contiguous pages (system) */
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw));
radeon_ring_write(ring, pe);
radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw);
ib->ptr[ib->length_dw++] = pe;
ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
for (; ndw > 0; ndw -= 2, --count, pe += 8) {
if (flags & RADEON_VM_PAGE_SYSTEM) {
value = radeon_vm_map_gart(rdev, addr);
Expand All @@ -2008,10 +2010,12 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
}
addr += incr;
value |= r600_flags;
radeon_ring_write(ring, value);
radeon_ring_write(ring, upper_32_bits(value));
ib->ptr[ib->length_dw++] = value;
ib->ptr[ib->length_dw++] = upper_32_bits(value);
}
}
while (ib->length_dw & 0x7)
ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
}
}

Expand Down
7 changes: 5 additions & 2 deletions drivers/gpu/drm/radeon/radeon.h
Original file line number Diff line number Diff line change
Expand Up @@ -780,6 +780,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
struct radeon_ib *ib, struct radeon_vm *vm,
unsigned size);
void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib);
void radeon_ib_sync_to(struct radeon_ib *ib, struct radeon_fence *fence);
int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
struct radeon_ib *const_ib);
int radeon_ib_pool_init(struct radeon_device *rdev);
Expand Down Expand Up @@ -1188,7 +1189,9 @@ struct radeon_asic {
void (*fini)(struct radeon_device *rdev);

u32 pt_ring_index;
void (*set_page)(struct radeon_device *rdev, uint64_t pe,
void (*set_page)(struct radeon_device *rdev,
struct radeon_ib *ib,
uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags);
} vm;
Expand Down Expand Up @@ -1810,7 +1813,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v);
#define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart.set_page((rdev), (i), (p))
#define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev))
#define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev))
#define radeon_asic_vm_set_page(rdev, pe, addr, count, incr, flags) ((rdev)->asic->vm.set_page((rdev), (pe), (addr), (count), (incr), (flags)))
#define radeon_asic_vm_set_page(rdev, ib, pe, addr, count, incr, flags) ((rdev)->asic->vm.set_page((rdev), (ib), (pe), (addr), (count), (incr), (flags)))
#define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)].ring_start((rdev), (cp))
#define radeon_ring_test(rdev, r, cp) (rdev)->asic->ring[(r)].ring_test((rdev), (cp))
#define radeon_ib_test(rdev, r, cp) (rdev)->asic->ring[(r)].ib_test((rdev), (cp))
Expand Down
8 changes: 6 additions & 2 deletions drivers/gpu/drm/radeon/radeon_asic.h
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,9 @@ int cayman_vm_init(struct radeon_device *rdev);
void cayman_vm_fini(struct radeon_device *rdev);
void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags);
void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
void cayman_vm_set_page(struct radeon_device *rdev,
struct radeon_ib *ib,
uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags);
int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
Expand Down Expand Up @@ -506,7 +508,9 @@ int si_irq_set(struct radeon_device *rdev);
int si_irq_process(struct radeon_device *rdev);
int si_vm_init(struct radeon_device *rdev);
void si_vm_fini(struct radeon_device *rdev);
void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
void si_vm_set_page(struct radeon_device *rdev,
struct radeon_ib *ib,
uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags);
void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
Expand Down
19 changes: 4 additions & 15 deletions drivers/gpu/drm/radeon/radeon_cs.c
Original file line number Diff line number Diff line change
Expand Up @@ -125,18 +125,6 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority
return 0;
}

static void radeon_cs_sync_to(struct radeon_cs_parser *p,
struct radeon_fence *fence)
{
struct radeon_fence *other;

if (!fence)
return;

other = p->ib.sync_to[fence->ring];
p->ib.sync_to[fence->ring] = radeon_fence_later(fence, other);
}

static void radeon_cs_sync_rings(struct radeon_cs_parser *p)
{
int i;
Expand All @@ -145,7 +133,7 @@ static void radeon_cs_sync_rings(struct radeon_cs_parser *p)
if (!p->relocs[i].robj)
continue;

radeon_cs_sync_to(p, p->relocs[i].robj->tbo.sync_obj);
radeon_ib_sync_to(&p->ib, p->relocs[i].robj->tbo.sync_obj);
}
}

Expand Down Expand Up @@ -472,8 +460,9 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
goto out;
}
radeon_cs_sync_rings(parser);
radeon_cs_sync_to(parser, vm->fence);
radeon_cs_sync_to(parser, radeon_vm_grab_id(rdev, vm, parser->ring));
radeon_ib_sync_to(&parser->ib, vm->fence);
radeon_ib_sync_to(&parser->ib, radeon_vm_grab_id(
rdev, vm, parser->ring));

if ((rdev->family >= CHIP_TAHITI) &&
(parser->chunk_const_ib_idx != -1)) {
Expand Down
60 changes: 24 additions & 36 deletions drivers/gpu/drm/radeon/radeon_gart.c
Original file line number Diff line number Diff line change
Expand Up @@ -929,6 +929,7 @@ uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr)
*/
static int radeon_vm_update_pdes(struct radeon_device *rdev,
struct radeon_vm *vm,
struct radeon_ib *ib,
uint64_t start, uint64_t end)
{
static const uint32_t incr = RADEON_VM_PTE_COUNT * 8;
Expand Down Expand Up @@ -971,7 +972,7 @@ static int radeon_vm_update_pdes(struct radeon_device *rdev,
((last_pt + incr * count) != pt)) {

if (count) {
radeon_asic_vm_set_page(rdev, last_pde,
radeon_asic_vm_set_page(rdev, ib, last_pde,
last_pt, count, incr,
RADEON_VM_PAGE_VALID);
}
Expand All @@ -985,7 +986,7 @@ static int radeon_vm_update_pdes(struct radeon_device *rdev,
}

if (count) {
radeon_asic_vm_set_page(rdev, last_pde, last_pt, count,
radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count,
incr, RADEON_VM_PAGE_VALID);

}
Expand All @@ -1009,6 +1010,7 @@ static int radeon_vm_update_pdes(struct radeon_device *rdev,
*/
static void radeon_vm_update_ptes(struct radeon_device *rdev,
struct radeon_vm *vm,
struct radeon_ib *ib,
uint64_t start, uint64_t end,
uint64_t dst, uint32_t flags)
{
Expand Down Expand Up @@ -1038,7 +1040,7 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
if ((last_pte + 8 * count) != pte) {

if (count) {
radeon_asic_vm_set_page(rdev, last_pte,
radeon_asic_vm_set_page(rdev, ib, last_pte,
last_dst, count,
RADEON_GPU_PAGE_SIZE,
flags);
Expand All @@ -1056,7 +1058,8 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
}

if (count) {
radeon_asic_vm_set_page(rdev, last_pte, last_dst, count,
radeon_asic_vm_set_page(rdev, ib, last_pte,
last_dst, count,
RADEON_GPU_PAGE_SIZE, flags);
}
}
Expand All @@ -1080,8 +1083,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
struct ttm_mem_reg *mem)
{
unsigned ridx = rdev->asic->vm.pt_ring_index;
struct radeon_ring *ring = &rdev->ring[ridx];
struct radeon_semaphore *sem = NULL;
struct radeon_ib ib;
struct radeon_bo_va *bo_va;
unsigned nptes, npdes, ndw;
uint64_t addr;
Expand Down Expand Up @@ -1124,25 +1126,13 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
bo_va->valid = false;
}

if (vm->fence && radeon_fence_signaled(vm->fence)) {
radeon_fence_unref(&vm->fence);
}

if (vm->fence && vm->fence->ring != ridx) {
r = radeon_semaphore_create(rdev, &sem);
if (r) {
return r;
}
}

nptes = radeon_bo_ngpu_pages(bo);

/* assume two extra pdes in case the mapping overlaps the borders */
npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2;

/* estimate number of dw needed */
/* semaphore, fence and padding */
ndw = 32;
/* padding, etc. */
ndw = 64;

if (RADEON_VM_BLOCK_SIZE > 11)
/* reserve space for one header for every 2k dwords */
Expand All @@ -1161,33 +1151,31 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
/* reserve space for pde addresses */
ndw += npdes * 2;

r = radeon_ring_lock(rdev, ring, ndw);
if (r) {
return r;
}
/* update too big for an IB */
if (ndw > 0xfffff)
return -ENOMEM;

if (sem && radeon_fence_need_sync(vm->fence, ridx)) {
radeon_semaphore_sync_rings(rdev, sem, vm->fence->ring, ridx);
radeon_fence_note_sync(vm->fence, ridx);
}
r = radeon_ib_get(rdev, ridx, &ib, NULL, ndw * 4);
ib.length_dw = 0;

r = radeon_vm_update_pdes(rdev, vm, bo_va->soffset, bo_va->eoffset);
r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset);
if (r) {
radeon_ring_unlock_undo(rdev, ring);
radeon_ib_free(rdev, &ib);
return r;
}

radeon_vm_update_ptes(rdev, vm, bo_va->soffset, bo_va->eoffset,
radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset,
addr, bo_va->flags);

radeon_fence_unref(&vm->fence);
r = radeon_fence_emit(rdev, &vm->fence, ridx);
radeon_ib_sync_to(&ib, vm->fence);
r = radeon_ib_schedule(rdev, &ib, NULL);
if (r) {
radeon_ring_unlock_undo(rdev, ring);
radeon_ib_free(rdev, &ib);
return r;
}
radeon_ring_unlock_commit(rdev, ring);
radeon_semaphore_free(rdev, &sem, vm->fence);
radeon_fence_unref(&vm->fence);
vm->fence = radeon_fence_ref(ib.fence);
radeon_ib_free(rdev, &ib);
radeon_fence_unref(&vm->last_flush);

return 0;
Expand Down
19 changes: 19 additions & 0 deletions drivers/gpu/drm/radeon/radeon_ring.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,25 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib)
radeon_fence_unref(&ib->fence);
}

/**
* radeon_ib_sync_to - sync to fence before executing the IB
*
* @ib: IB object to add fence to
* @fence: fence to sync to
*
* Sync to the fence before executing the IB
*/
void radeon_ib_sync_to(struct radeon_ib *ib, struct radeon_fence *fence)
{
struct radeon_fence *other;

if (!fence)
return;

other = ib->sync_to[fence->ring];
ib->sync_to[fence->ring] = radeon_fence_later(fence, other);
}

/**
* radeon_ib_schedule - schedule an IB (Indirect Buffer) on the ring
*
Expand Down
Loading

0 comments on commit 43f1214

Please sign in to comment.