Skip to content

Commit

Permalink
drm/radeon/kms: Add initial support for async DMA on SI
Browse files Browse the repository at this point in the history
Pretty much the same as cayman.  Some changes to the copy
packets.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
Alex Deucher committed Dec 10, 2012
1 parent f60cbd1 commit 8c5fd7e
Show file tree
Hide file tree
Showing 5 changed files with 254 additions and 5 deletions.
4 changes: 4 additions & 0 deletions drivers/gpu/drm/radeon/radeon.h
Original file line number Diff line number Diff line change
Expand Up @@ -797,6 +797,10 @@ void r600_dma_stop(struct radeon_device *rdev);
int r600_dma_resume(struct radeon_device *rdev);
void r600_dma_fini(struct radeon_device *rdev);

void cayman_dma_stop(struct radeon_device *rdev);
int cayman_dma_resume(struct radeon_device *rdev);
void cayman_dma_fini(struct radeon_device *rdev);

/*
* CS.
*/
Expand Down
24 changes: 22 additions & 2 deletions drivers/gpu/drm/radeon/radeon_asic.c
Original file line number Diff line number Diff line change
Expand Up @@ -1731,6 +1731,26 @@ static struct radeon_asic si_asic = {
.ib_test = &r600_ib_test,
.is_lockup = &si_gpu_is_lockup,
.vm_flush = &si_vm_flush,
},
[R600_RING_TYPE_DMA_INDEX] = {
.ib_execute = &cayman_dma_ring_ib_execute,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
.cs_parse = NULL,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &cayman_dma_is_lockup,
.vm_flush = &si_dma_vm_flush,
},
[CAYMAN_RING_TYPE_DMA1_INDEX] = {
.ib_execute = &cayman_dma_ring_ib_execute,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
.cs_parse = NULL,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &cayman_dma_is_lockup,
.vm_flush = &si_dma_vm_flush,
}
},
.irq = {
Expand All @@ -1747,8 +1767,8 @@ static struct radeon_asic si_asic = {
.copy = {
.blit = NULL,
.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
.dma = NULL,
.dma_ring_index = RADEON_RING_TYPE_GFX_INDEX,
.dma = &si_copy_dma,
.dma_ring_index = R600_RING_TYPE_DMA_INDEX,
.copy = NULL,
.copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
},
Expand Down
5 changes: 5 additions & 0 deletions drivers/gpu/drm/radeon/radeon_asic.h
Original file line number Diff line number Diff line change
Expand Up @@ -501,5 +501,10 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
uint64_t si_get_gpu_clock(struct radeon_device *rdev);
int si_copy_dma(struct radeon_device *rdev,
uint64_t src_offset, uint64_t dst_offset,
unsigned num_gpu_pages,
struct radeon_fence **fence);
void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);

#endif
179 changes: 176 additions & 3 deletions drivers/gpu/drm/radeon/si.c
Original file line number Diff line number Diff line change
Expand Up @@ -1660,6 +1660,8 @@ static void si_gpu_init(struct radeon_device *rdev)
WREG32(GB_ADDR_CONFIG, gb_addr_config);
WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
WREG32(HDP_ADDR_CONFIG, gb_addr_config);
WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);

si_tiling_mode_table_init(rdev);

Expand Down Expand Up @@ -1836,6 +1838,9 @@ static void si_cp_enable(struct radeon_device *rdev, bool enable)
radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
WREG32(SCRATCH_UMSK, 0);
rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
}
udelay(50);
}
Expand Down Expand Up @@ -2891,6 +2896,32 @@ void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
radeon_ring_write(ring, 0x0);
}

void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
{
struct radeon_ring *ring = &rdev->ring[ridx];

if (vm == NULL)
return;

radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
if (vm->id < 8) {
radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
} else {
radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
}
radeon_ring_write(ring, vm->pd_gpu_addr >> 12);

/* flush hdp cache */
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
radeon_ring_write(ring, 1);

/* bits 0-7 are the VM contexts0-7 */
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
radeon_ring_write(ring, 1 << vm->id);
}

/*
* RLC
*/
Expand Down Expand Up @@ -3059,6 +3090,10 @@ static void si_disable_interrupt_state(struct radeon_device *rdev)
WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
WREG32(CP_INT_CNTL_RING1, 0);
WREG32(CP_INT_CNTL_RING2, 0);
tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
WREG32(GRBM_INT_CNTL, 0);
WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
Expand Down Expand Up @@ -3178,6 +3213,7 @@ int si_irq_set(struct radeon_device *rdev)
u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
u32 grbm_int_cntl = 0;
u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
u32 dma_cntl, dma_cntl1;

if (!rdev->irq.installed) {
WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
Expand All @@ -3198,6 +3234,9 @@ int si_irq_set(struct radeon_device *rdev)
hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;

dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;

/* enable CP interrupts on all rings */
if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
DRM_DEBUG("si_irq_set: sw int gfx\n");
Expand All @@ -3211,6 +3250,15 @@ int si_irq_set(struct radeon_device *rdev)
DRM_DEBUG("si_irq_set: sw int cp2\n");
cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
}
if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
DRM_DEBUG("si_irq_set: sw int dma\n");
dma_cntl |= TRAP_ENABLE;
}

if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
DRM_DEBUG("si_irq_set: sw int dma1\n");
dma_cntl1 |= TRAP_ENABLE;
}
if (rdev->irq.crtc_vblank_int[0] ||
atomic_read(&rdev->irq.pflip[0])) {
DRM_DEBUG("si_irq_set: vblank 0\n");
Expand Down Expand Up @@ -3270,6 +3318,9 @@ int si_irq_set(struct radeon_device *rdev)
WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);

WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);

WREG32(GRBM_INT_CNTL, grbm_int_cntl);

WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
Expand Down Expand Up @@ -3728,9 +3779,17 @@ int si_irq_process(struct radeon_device *rdev)
break;
}
break;
case 224: /* DMA trap event */
DRM_DEBUG("IH: DMA trap\n");
radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
break;
case 233: /* GUI IDLE */
DRM_DEBUG("IH: GUI idle\n");
break;
case 244: /* DMA trap event */
DRM_DEBUG("IH: DMA1 trap\n");
radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
break;
default:
DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
break;
Expand All @@ -3754,6 +3813,80 @@ int si_irq_process(struct radeon_device *rdev)
return IRQ_HANDLED;
}

/**
* si_copy_dma - copy pages using the DMA engine
*
* @rdev: radeon_device pointer
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @num_gpu_pages: number of GPU pages to xfer
* @fence: radeon fence object
*
* Copy GPU paging using the DMA engine (SI).
* Used by the radeon ttm implementation to move pages if
* registered as the asic copy callback.
*/
int si_copy_dma(struct radeon_device *rdev,
uint64_t src_offset, uint64_t dst_offset,
unsigned num_gpu_pages,
struct radeon_fence **fence)
{
struct radeon_semaphore *sem = NULL;
int ring_index = rdev->asic->copy.dma_ring_index;
struct radeon_ring *ring = &rdev->ring[ring_index];
u32 size_in_bytes, cur_size_in_bytes;
int i, num_loops;
int r = 0;

r = radeon_semaphore_create(rdev, &sem);
if (r) {
DRM_ERROR("radeon: moving bo (%d).\n", r);
return r;
}

size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
if (r) {
DRM_ERROR("radeon: moving bo (%d).\n", r);
radeon_semaphore_free(rdev, &sem, NULL);
return r;
}

if (radeon_fence_need_sync(*fence, ring->idx)) {
radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
ring->idx);
radeon_fence_note_sync(*fence, ring->idx);
} else {
radeon_semaphore_free(rdev, &sem, NULL);
}

for (i = 0; i < num_loops; i++) {
cur_size_in_bytes = size_in_bytes;
if (cur_size_in_bytes > 0xFFFFF)
cur_size_in_bytes = 0xFFFFF;
size_in_bytes -= cur_size_in_bytes;
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
radeon_ring_write(ring, dst_offset & 0xffffffff);
radeon_ring_write(ring, src_offset & 0xffffffff);
radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
src_offset += cur_size_in_bytes;
dst_offset += cur_size_in_bytes;
}

r = radeon_fence_emit(rdev, fence, ring->idx);
if (r) {
radeon_ring_unlock_undo(rdev, ring);
return r;
}

radeon_ring_unlock_commit(rdev, ring);
radeon_semaphore_free(rdev, &sem, *fence);

return r;
}

/*
* startup/shutdown callbacks
*/
Expand Down Expand Up @@ -3825,6 +3958,18 @@ static int si_startup(struct radeon_device *rdev)
return r;
}

r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
if (r) {
dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
return r;
}

r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
if (r) {
dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
return r;
}

/* Enable IRQ */
r = si_irq_init(rdev);
if (r) {
Expand Down Expand Up @@ -3855,13 +4000,33 @@ static int si_startup(struct radeon_device *rdev)
if (r)
return r;

ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
if (r)
return r;

ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
if (r)
return r;

r = si_cp_load_microcode(rdev);
if (r)
return r;
r = si_cp_resume(rdev);
if (r)
return r;

r = cayman_dma_resume(rdev);
if (r)
return r;

r = radeon_ib_pool_init(rdev);
if (r) {
dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
Expand Down Expand Up @@ -3903,9 +4068,7 @@ int si_resume(struct radeon_device *rdev)
int si_suspend(struct radeon_device *rdev)
{
si_cp_enable(rdev, false);
rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
cayman_dma_stop(rdev);
si_irq_suspend(rdev);
radeon_wb_disable(rdev);
si_pcie_gart_disable(rdev);
Expand Down Expand Up @@ -3983,6 +4146,14 @@ int si_init(struct radeon_device *rdev)
ring->ring_obj = NULL;
r600_ring_init(rdev, ring, 1024 * 1024);

ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
ring->ring_obj = NULL;
r600_ring_init(rdev, ring, 64 * 1024);

ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
ring->ring_obj = NULL;
r600_ring_init(rdev, ring, 64 * 1024);

rdev->ih.ring_obj = NULL;
r600_ih_ring_init(rdev, 64 * 1024);

Expand All @@ -3995,6 +4166,7 @@ int si_init(struct radeon_device *rdev)
if (r) {
dev_err(rdev->dev, "disabling GPU acceleration\n");
si_cp_fini(rdev);
cayman_dma_fini(rdev);
si_irq_fini(rdev);
si_rlc_fini(rdev);
radeon_wb_fini(rdev);
Expand Down Expand Up @@ -4023,6 +4195,7 @@ void si_fini(struct radeon_device *rdev)
r600_blit_fini(rdev);
#endif
si_cp_fini(rdev);
cayman_dma_fini(rdev);
si_irq_fini(rdev);
si_rlc_fini(rdev);
radeon_wb_fini(rdev);
Expand Down
Loading

0 comments on commit 8c5fd7e

Please sign in to comment.