Skip to content

Commit

Permalink
drm/amdgpu: Use unique doorbell range per xcc
Browse files Browse the repository at this point in the history
Program different ranges in each XCC with MEC_DOORBELL_RANGE_LOWER/HIGHER.
Keeping the same range causes CPF in other XCCs also to be busy when an IB
packet is submitted to KCQ. Only the XCC which processes the packet
comes back to idle afterwards and this causes other CPs not be idle.
This in turn affects clockgating behavior as RLC doesn't get idle
interrupt.

LOWER/HIGHER covers only KIQ/KCQs which are per XCC queues. Assigning
different ranges doesn't seem to have any side effect as user queue ranges
are outside of this range. User queue tests - PM4 through KFD and AQL
through rocr - have the same results after this change.

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
Lijo Lazar authored and Alex Deucher committed Jun 9, 2023
1 parent 7389c75 commit 233bb37
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 25 deletions.
35 changes: 20 additions & 15 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ struct amdgpu_doorbell_index {
uint32_t max_assignment;
/* Per engine SDMA doorbell size in dword */
uint32_t sdma_doorbell_range;
/* Per xcc doorbell size for KIQ/KCQ */
uint32_t xcc_doorbell_range;
};

typedef enum _AMDGPU_DOORBELL_ASSIGNMENT
Expand Down Expand Up @@ -309,28 +311,31 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
AMDGPU_DOORBELL64_INVALID = 0xFFFF
} AMDGPU_DOORBELL64_ASSIGNMENT;

typedef enum _AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1
{
/* KIQ: 0~7 for maximum 8 XCD */
AMDGPU_DOORBELL_LAYOUT1_KIQ_START = 0x000,
AMDGPU_DOORBELL_LAYOUT1_HIQ = 0x008,
AMDGPU_DOORBELL_LAYOUT1_DIQ = 0x009,
/* Compute: 0x0A ~ 0x49 */
AMDGPU_DOORBELL_LAYOUT1_MEC_RING_START = 0x00A,
AMDGPU_DOORBELL_LAYOUT1_MEC_RING_END = 0x049,
AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START = 0x04A,
AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END = 0x0C9,
typedef enum _AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 {
/* XCC0: 0x00 ~20, XCC1: 20 ~ 2F ... */

/* KIQ/HIQ/DIQ */
AMDGPU_DOORBELL_LAYOUT1_KIQ_START = 0x000,
AMDGPU_DOORBELL_LAYOUT1_HIQ = 0x001,
AMDGPU_DOORBELL_LAYOUT1_DIQ = 0x002,
/* Compute: 0x08 ~ 0x20 */
AMDGPU_DOORBELL_LAYOUT1_MEC_RING_START = 0x008,
AMDGPU_DOORBELL_LAYOUT1_MEC_RING_END = 0x00F,
AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START = 0x010,
AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END = 0x01F,
AMDGPU_DOORBELL_LAYOUT1_XCC_RANGE = 0x020,

/* SDMA: 0x100 ~ 0x19F */
AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START = 0x100,
AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END = 0x19F,
AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START = 0x100,
AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END = 0x19F,
/* IH: 0x1A0 ~ 0x1AF */
AMDGPU_DOORBELL_LAYOUT1_IH = 0x1A0,
/* VCN: 0x1B0 ~ 0x1D4 */
AMDGPU_DOORBELL_LAYOUT1_VCN_START = 0x1B0,
AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1D4,

AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START,
AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_VCN_END,
AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START,
AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_VCN_END,

AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT = 0x1D4,
AMDGPU_DOORBELL_LAYOUT1_INVALID = 0xFFFF
Expand Down
5 changes: 4 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,10 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
ring->use_doorbell = true;
ring->xcc_id = xcc_id;
ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
ring->doorbell_index = (adev->doorbell_index.kiq + xcc_id) << 1;
ring->doorbell_index =
(adev->doorbell_index.kiq +
xcc_id * adev->doorbell_index.xcc_doorbell_range)
<< 1;

r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id);
if (r)
Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev)

adev->doorbell_index.userqueue_start = AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_START;
adev->doorbell_index.userqueue_end = AMDGPU_DOORBELL_LAYOUT1_USERQUEUE_END;
adev->doorbell_index.xcc_doorbell_range = AMDGPU_DOORBELL_LAYOUT1_XCC_RANGE;

adev->doorbell_index.sdma_doorbell_range = 20;
for (i = 0; i < adev->sdma.num_instances; i++)
Expand Down
31 changes: 22 additions & 9 deletions drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
Original file line number Diff line number Diff line change
Expand Up @@ -729,8 +729,10 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id,
unsigned irq_type;
struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
unsigned int hw_prio;
uint32_t xcc_doorbell_start;

ring = &adev->gfx.compute_ring[ring_id];
ring = &adev->gfx.compute_ring[xcc_id * adev->gfx.num_compute_rings +
ring_id];

/* mec0 is me1 */
ring->xcc_id = xcc_id;
Expand All @@ -740,9 +742,12 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id,

ring->ring_obj = NULL;
ring->use_doorbell = true;
ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+ (ring_id * GFX9_MEC_HPD_SIZE);
xcc_doorbell_start = adev->doorbell_index.mec_ring0 +
xcc_id * adev->doorbell_index.xcc_doorbell_range;
ring->doorbell_index = (xcc_doorbell_start + ring_id) << 1;
ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr +
(ring_id + xcc_id * adev->gfx.num_compute_rings) *
GFX9_MEC_HPD_SIZE;
ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
sprintf(ring->name, "comp_%d.%d.%d.%d",
ring->xcc_id, ring->me, ring->pipe, ring->queue);
Expand Down Expand Up @@ -801,8 +806,8 @@ static int gfx_v9_4_3_sw_init(void *handle)
}

/* set up the compute queues - allocate horizontally across pipes */
ring_id = 0;
for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
ring_id = 0;
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.mec.num_pipe_per_mec;
Expand Down Expand Up @@ -1654,10 +1659,18 @@ static int gfx_v9_4_3_xcc_kiq_init_register(struct amdgpu_ring *ring,

/* enable the doorbell if requested */
if (ring->use_doorbell) {
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DOORBELL_RANGE_LOWER,
(adev->doorbell_index.kiq * 2) << 2);
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DOORBELL_RANGE_UPPER,
(adev->doorbell_index.userqueue_end * 2) << 2);
WREG32_SOC15(
GC, GET_INST(GC, xcc_id),
regCP_MEC_DOORBELL_RANGE_LOWER,
((adev->doorbell_index.kiq +
xcc_id * adev->doorbell_index.xcc_doorbell_range) *
2) << 2);
WREG32_SOC15(
GC, GET_INST(GC, xcc_id),
regCP_MEC_DOORBELL_RANGE_UPPER,
((adev->doorbell_index.userqueue_end +
xcc_id * adev->doorbell_index.xcc_doorbell_range) *
2) << 2);
}

WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL,
Expand Down

0 comments on commit 233bb37

Please sign in to comment.