Skip to content

Commit

Permalink
drm/amdgpu: Reduce dequeue retry timeout for gfx9 family
Browse files Browse the repository at this point in the history
Dequeue retry timeout controls the interval between checks for unmet
conditions. On MI series, reduce this from 0x40 to 0x1 (~ 1 uS). The
cost of additional bandwidth consumed by CP when polling memory
shouldn't be substantial.

Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Reviewed-by: Jonathan Kim <jonathan.kim@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
Harish Kasiviswanathan authored and Alex Deucher committed Mar 14, 2025
1 parent 02fc2f3 commit 8a7820c
Show file tree
Hide file tree
Showing 10 changed files with 72 additions and 52 deletions.
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.set_address_watch = kgd_gfx_aldebaran_set_address_watch,
.clear_address_watch = kgd_gfx_v9_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
.build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
.hqd_reset = kgd_gfx_v9_hqd_reset,
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
.set_address_watch = kgd_gfx_v9_set_address_watch,
.clear_address_watch = kgd_gfx_v9_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
.build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
Expand Down
4 changes: 2 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
Original file line number Diff line number Diff line change
Expand Up @@ -541,8 +541,8 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings =
kgd_gfx_v9_program_trap_handler_settings,
.build_grace_period_packet_info =
kgd_gfx_v9_build_grace_period_packet_info,
.build_dequeue_wait_counts_packet_info =
kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.enable_debug_trap = kgd_aldebaran_enable_debug_trap,
.disable_debug_trap = kgd_gfx_v9_4_3_disable_debug_trap,
Expand Down
28 changes: 14 additions & 14 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
Original file line number Diff line number Diff line change
Expand Up @@ -1021,25 +1021,25 @@ void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
*wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
}

void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t sch_wave,
uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data)
{
*reg_data = wait_times;

/*
* The CP cannont handle a 0 grace period input and will result in
* an infinite grace period being set so set to 1 to prevent this.
*/
if (grace_period == 0)
grace_period = 1;

*reg_data = REG_SET_FIELD(*reg_data,
CP_IQ_WAIT_TIME2,
SCH_WAVE,
grace_period);
if (sch_wave)
*reg_data = REG_SET_FIELD(*reg_data,
CP_IQ_WAIT_TIME2,
SCH_WAVE,
sch_wave);
if (que_sleep)
*reg_data = REG_SET_FIELD(*reg_data,
CP_IQ_WAIT_TIME2,
QUE_SLEEP,
que_sleep);

*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
}
Expand Down Expand Up @@ -1115,7 +1115,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
.set_address_watch = kgd_gfx_v10_set_address_watch,
.clear_address_watch = kgd_gfx_v10_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
.build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
.program_trap_handler_settings = program_trap_handler_settings,
.hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr,
.hqd_reset = kgd_gfx_v10_hqd_reset,
Expand Down
5 changes: 3 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,10 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
uint32_t *wait_times,
uint32_t inst);
void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t sch_wave,
uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data);
uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev,
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
Original file line number Diff line number Diff line change
Expand Up @@ -673,7 +673,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
.set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
.program_trap_handler_settings = program_trap_handler_settings_v10_3,
.get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
.build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
.enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
.disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
.validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
Expand Down
28 changes: 14 additions & 14 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
Original file line number Diff line number Diff line change
Expand Up @@ -1077,25 +1077,25 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
adev->gfx.cu_info.max_waves_per_simd;
}

void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t sch_wave,
uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data)
{
*reg_data = wait_times;

/*
* The CP cannot handle a 0 grace period input and will result in
* an infinite grace period being set so set to 1 to prevent this.
*/
if (grace_period == 0)
grace_period = 1;

*reg_data = REG_SET_FIELD(*reg_data,
CP_IQ_WAIT_TIME2,
SCH_WAVE,
grace_period);
if (sch_wave)
*reg_data = REG_SET_FIELD(*reg_data,
CP_IQ_WAIT_TIME2,
SCH_WAVE,
sch_wave);
if (que_sleep)
*reg_data = REG_SET_FIELD(*reg_data,
CP_IQ_WAIT_TIME2,
QUE_SLEEP,
que_sleep);

*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
}
Expand Down Expand Up @@ -1255,7 +1255,7 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
.set_address_watch = kgd_gfx_v9_set_address_watch,
.clear_address_watch = kgd_gfx_v9_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
.build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
Expand Down
5 changes: 3 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,10 @@ uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
uint32_t *wait_times,
uint32_t inst);
void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t sch_wave,
uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data);
uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,
Expand Down
43 changes: 30 additions & 13 deletions drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
Original file line number Diff line number Diff line change
Expand Up @@ -298,13 +298,14 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
}

static inline void pm_build_dequeue_wait_counts_packet_info(struct packet_manager *pm,
uint32_t sch_value, uint32_t *reg_offset,
uint32_t sch_value, uint32_t que_sleep, uint32_t *reg_offset,
uint32_t *reg_data)
{
pm->dqm->dev->kfd2kgd->build_grace_period_packet_info(
pm->dqm->dev->kfd2kgd->build_dequeue_wait_counts_packet_info(
pm->dqm->dev->adev,
pm->dqm->wait_times,
sch_value,
que_sleep,
reg_offset,
reg_data);
}
Expand All @@ -319,27 +320,43 @@ static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm,
uint32_t reg_data = 0;

switch (cmd) {
case KFD_DEQUEUE_WAIT_INIT:
/* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu &&
(KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3)))
pm_build_dequeue_wait_counts_packet_info(pm, 1, &reg_offset, &reg_data);
else
case KFD_DEQUEUE_WAIT_INIT: {
uint32_t sch_wave = 0, que_sleep = 0;
/* Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default 0x40.
* On a 1GHz machine this is roughly 1 microsecond, which is
* about how long it takes to load data out of memory during
* queue connect
* QUE_SLEEP: Wait Count for Dequeue Retry.
*/
if (KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(9, 4, 1) &&
KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(10, 0, 0)) {
que_sleep = 1;

/* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu &&
(KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3)))
sch_wave = 1;
} else {
return 0;
}
pm_build_dequeue_wait_counts_packet_info(pm, sch_wave, que_sleep,
&reg_offset, &reg_data);

break;
}
case KFD_DEQUEUE_WAIT_RESET:
/* function called only to get reg_offset */
pm_build_dequeue_wait_counts_packet_info(pm, 0, &reg_offset, &reg_data);
reg_data = pm->dqm->wait_times;
/* reg_data would be set to dqm->wait_times */
pm_build_dequeue_wait_counts_packet_info(pm, 0, 0, &reg_offset, &reg_data);
break;

case KFD_DEQUEUE_WAIT_SET_SCH_WAVE:
/* The CP cannot handle value 0 and it will result in
* an infinite grace period being set so set to 1 to prevent this.
* an infinite grace period being set so set to 1 to prevent this. Also
* avoid debugger API breakage as it sets 0 and expects a low value.
*/
if (!value)
value = 1;
pm_build_dequeue_wait_counts_packet_info(pm, value, &reg_offset, &reg_data);
pm_build_dequeue_wait_counts_packet_info(pm, value, 0, &reg_offset, &reg_data);
break;
default:
pr_err("Invalid dequeue wait cmd\n");
Expand Down
5 changes: 3 additions & 2 deletions drivers/gpu/drm/amd/include/kgd_kfd_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -313,9 +313,10 @@ struct kfd2kgd_calls {
void (*get_iq_wait_times)(struct amdgpu_device *adev,
uint32_t *wait_times,
uint32_t inst);
void (*build_grace_period_packet_info)(struct amdgpu_device *adev,
void (*build_dequeue_wait_counts_packet_info)(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t sch_wave,
uint32_t que_sleep,
uint32_t *reg_offset,
uint32_t *reg_data);
void (*get_cu_occupancy)(struct amdgpu_device *adev,
Expand Down

0 comments on commit 8a7820c

Please sign in to comment.