From 3855f1d925d4f0971e35ec8c44f62862f78500fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 7 Mar 2025 09:57:45 -0500 Subject: [PATCH 01/68] drm/amd/display: allow 256B DCC max compressed block sizes on gfx12 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hw supports it. Signed-off-by: Marek Olšák Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index ce08c428ba4c..653f2bc77530 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -122,9 +122,10 @@ * - 3.60.0 - Add AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE (Vulkan requirement) * - 3.61.0 - Contains fix for RV/PCO compute queues * - 3.62.0 - Add AMDGPU_IDS_FLAGS_MODE_PF, AMDGPU_IDS_FLAGS_MODE_VF & AMDGPU_IDS_FLAGS_MODE_PT + * - 3.63.0 - GFX12 display DCC supports 256B max compressed block size */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 62 +#define KMS_DRIVER_MINOR 63 #define KMS_DRIVER_PATCHLEVEL 0 /* diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 0090e08d5057..3e0f45f1711c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -700,7 +700,7 @@ static void amdgpu_dm_plane_add_gfx12_modifiers(struct amdgpu_device *adev, uint64_t mod_4k = ver | AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX12_4K_2D); uint64_t mod_256b = ver | AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX12_256B_2D); uint64_t dcc = ver | AMD_FMT_MOD_SET(DCC, 1); - uint8_t max_comp_block[] = {1, 0}; + uint8_t max_comp_block[] = {2, 1, 0}; uint64_t max_comp_block_mod[ARRAY_SIZE(max_comp_block)] = {0}; uint8_t i = 0, j = 0; uint64_t gfx12_modifiers[] = {mod_256k, mod_64k, mod_4k, mod_256b, DRM_FORMAT_MOD_LINEAR}; From 2c01befe4a2707302eb1c97b955d94d66fac7b6f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 4 Mar 2025 17:57:43 -0500 Subject: [PATCH 02/68] drm/amdgpu/vcn: fix idle work handler for VCN 2.5 VCN 2.5 uses the PG callback to enable VCN DPM which is a global state. As such, we need to make sure all instances are in the same state. v2: switch to a ref count (Lijo) v3: switch to its own idle work handler v4: fix logic in DPG handling Fixes: 4ce4fe27205c ("drm/amdgpu/vcn: use per instance callbacks for idle work handler") Reviewed-by: Boyuan Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 120 +++++++++++++++++++++++++- 1 file changed, 116 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index dff1a8859036..ff03436698a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -107,6 +107,115 @@ static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN1 }; +static void vcn_v2_5_idle_work_handler(struct work_struct *work) +{ + struct amdgpu_vcn_inst *vcn_inst = + container_of(work, struct amdgpu_vcn_inst, idle_work.work); + struct amdgpu_device *adev = vcn_inst->adev; + unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0}; + unsigned int i, j; + int r = 0; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + struct amdgpu_vcn_inst *v = &adev->vcn.inst[i]; + + if (adev->vcn.harvest_config & (1 << i)) + continue; + + for (j = 0; j < v->num_enc_rings; ++j) + fence[i] += amdgpu_fence_count_emitted(&v->ring_enc[j]); + + /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && + !v->using_unified_queue) { + struct dpg_pause_state new_state; + + if (fence[i] || + unlikely(atomic_read(&v->dpg_enc_submission_cnt))) + new_state.fw_based = VCN_DPG_STATE__PAUSE; + else + new_state.fw_based = VCN_DPG_STATE__UNPAUSE; + + v->pause_dpg_mode(v, &new_state); + } + + fence[i] += amdgpu_fence_count_emitted(&v->ring_dec); + fences += fence[i]; + + } + + if (!fences && !atomic_read(&adev->vcn.inst[0].total_submission_cnt)) { + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_GATE); + r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, + false); + if (r) + dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); + } else { + schedule_delayed_work(&adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT); + } +} + +static void vcn_v2_5_ring_begin_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vcn_inst *v = &adev->vcn.inst[ring->me]; + int r = 0; + + atomic_inc(&adev->vcn.inst[0].total_submission_cnt); + + if (!cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work)) { + r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, + true); + if (r) + dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r); + } + + mutex_lock(&adev->vcn.inst[0].vcn_pg_lock); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_UNGATE); + + /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && + !v->using_unified_queue) { + struct dpg_pause_state new_state; + + if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) { + atomic_inc(&v->dpg_enc_submission_cnt); + new_state.fw_based = VCN_DPG_STATE__PAUSE; + } else { + unsigned int fences = 0; + unsigned int i; + + for (i = 0; i < v->num_enc_rings; ++i) + fences += amdgpu_fence_count_emitted(&v->ring_enc[i]); + + if (fences || atomic_read(&v->dpg_enc_submission_cnt)) + new_state.fw_based = VCN_DPG_STATE__PAUSE; + else + new_state.fw_based = VCN_DPG_STATE__UNPAUSE; + } + v->pause_dpg_mode(v, &new_state); + } + mutex_unlock(&adev->vcn.inst[0].vcn_pg_lock); +} + +static void vcn_v2_5_ring_end_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ + if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && + ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC && + !adev->vcn.inst[ring->me].using_unified_queue) + atomic_dec(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt); + + atomic_dec(&adev->vcn.inst[0].total_submission_cnt); + + schedule_delayed_work(&adev->vcn.inst[0].idle_work, + VCN_IDLE_TIMEOUT); +} + /** * vcn_v2_5_early_init - set function pointers and load microcode * @@ -201,6 +310,9 @@ static int vcn_v2_5_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; + /* Override the work func */ + adev->vcn.inst[j].idle_work.work.func = vcn_v2_5_idle_work_handler; + amdgpu_vcn_setup_ucode(adev, j); r = amdgpu_vcn_resume(adev, j); @@ -1661,8 +1773,8 @@ static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = { .insert_start = vcn_v2_0_dec_ring_insert_start, .insert_end = vcn_v2_0_dec_ring_insert_end, .pad_ib = amdgpu_ring_generic_pad_ib, - .begin_use = amdgpu_vcn_ring_begin_use, - .end_use = amdgpu_vcn_ring_end_use, + .begin_use = vcn_v2_5_ring_begin_use, + .end_use = vcn_v2_5_ring_end_use, .emit_wreg = vcn_v2_0_dec_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, @@ -1759,8 +1871,8 @@ static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = { .insert_nop = amdgpu_ring_insert_nop, .insert_end = vcn_v2_0_enc_ring_insert_end, .pad_ib = amdgpu_ring_generic_pad_ib, - .begin_use = amdgpu_vcn_ring_begin_use, - .end_use = amdgpu_vcn_ring_end_use, + .begin_use = vcn_v2_5_ring_begin_use, + .end_use = vcn_v2_5_ring_end_use, .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, From ed962f8d0603da15c26f1c9ce60cba42607a2768 Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Tue, 11 Feb 2025 12:56:01 -0500 Subject: [PATCH 03/68] drm/amdkfd: Add pm_config_dequeue_wait_counts API Update pm_update_grace_period() to more cleaner pm_config_dequeue_wait_counts(). Previously, grace_period variable was overloaded as a variable and a macro, making it inflexible to configure additional dequeue wait times. pm_config_dequeue_wait_counts() now takes in a cmd / variable. This allows flexibility to update different dequeue wait times. Signed-off-by: Harish Kasiviswanathan Reviewed-by: Jonathan Kim Signed-off-by: Alex Deucher --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 45 ++++++-------- .../drm/amd/amdkfd/kfd_device_queue_manager.h | 11 +++- .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 26 +++++++- .../drm/amd/amdkfd/kfd_packet_manager_v9.c | 59 ++++++++++++++----- .../drm/amd/amdkfd/kfd_packet_manager_vi.c | 4 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 28 +++++++-- 6 files changed, 123 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 2afcc1b4856a..885e0e9cf21b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -43,6 +43,8 @@ /* Size of the per-pipe EOP queue */ #define CIK_HPD_EOP_BYTES_LOG2 11 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) +/* See unmap_queues_cpsch() */ +#define USE_DEFAULT_GRACE_PERIOD 0xffffffff static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid, unsigned int vmid); @@ -1746,10 +1748,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm) init_sdma_bitmaps(dqm); - if (dqm->dev->kfd2kgd->get_iq_wait_times) - dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev, - &dqm->wait_times, - ffs(dqm->dev->xcc_mask) - 1); + update_dqm_wait_times(dqm); return 0; } @@ -1845,25 +1844,11 @@ static int start_cpsch(struct device_queue_manager *dqm) /* clear hang status when driver try to start the hw scheduler */ dqm->sched_running = true; - if (!dqm->dev->kfd->shared_resources.enable_mes) + if (!dqm->dev->kfd->shared_resources.enable_mes) { + if (pm_config_dequeue_wait_counts(&dqm->packet_mgr, + KFD_DEQUEUE_WAIT_INIT, 0 /* unused */)) + dev_err(dev, "Setting optimized dequeue wait failed. Using default values\n"); execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); - - /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */ - if (amdgpu_emu_mode == 0 && dqm->dev->adev->gmc.is_app_apu && - (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))) { - uint32_t reg_offset = 0; - uint32_t grace_period = 1; - - retval = pm_update_grace_period(&dqm->packet_mgr, - grace_period); - if (retval) - dev_err(dev, "Setting grace timeout failed\n"); - else if (dqm->dev->kfd2kgd->build_grace_period_packet_info) - /* Update dqm->wait_times maintained in software */ - dqm->dev->kfd2kgd->build_grace_period_packet_info( - dqm->dev->adev, dqm->wait_times, - grace_period, ®_offset, - &dqm->wait_times); } /* setup per-queue reset detection buffer */ @@ -2359,7 +2344,14 @@ static int reset_queues_on_hws_hang(struct device_queue_manager *dqm, bool is_sd return is_sdma ? reset_hung_queues_sdma(dqm) : reset_hung_queues(dqm); } -/* dqm->lock mutex has to be locked before calling this function */ +/* dqm->lock mutex has to be locked before calling this function + * + * @grace_period: If USE_DEFAULT_GRACE_PERIOD then default wait time + * for context switch latency. Lower values are used by debugger + * since context switching are triggered at high frequency. + * This is configured by setting CP_IQ_WAIT_TIME2.SCH_WAVE + * + */ static int unmap_queues_cpsch(struct device_queue_manager *dqm, enum kfd_unmap_queues_filter filter, uint32_t filter_param, @@ -2378,7 +2370,8 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, return -EIO; if (grace_period != USE_DEFAULT_GRACE_PERIOD) { - retval = pm_update_grace_period(&dqm->packet_mgr, grace_period); + retval = pm_config_dequeue_wait_counts(&dqm->packet_mgr, + KFD_DEQUEUE_WAIT_SET_SCH_WAVE, grace_period); if (retval) goto out; } @@ -2419,8 +2412,8 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, /* We need to reset the grace period value for this device */ if (grace_period != USE_DEFAULT_GRACE_PERIOD) { - if (pm_update_grace_period(&dqm->packet_mgr, - USE_DEFAULT_GRACE_PERIOD)) + if (pm_config_dequeue_wait_counts(&dqm->packet_mgr, + KFD_DEQUEUE_WAIT_RESET, 0 /* unused */)) dev_err(dev, "Failed to reset grace period\n"); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 122eb745e9c4..74a61b5b2f0b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -37,7 +37,6 @@ #define KFD_MES_PROCESS_QUANTUM 100000 #define KFD_MES_GANG_QUANTUM 10000 -#define USE_DEFAULT_GRACE_PERIOD 0xffffffff struct device_process_node { struct qcm_process_device *qpd; @@ -360,4 +359,14 @@ static inline int read_sdma_queue_counter(uint64_t __user *q_rptr, uint64_t *val /* SDMA activity counter is stored at queue's RPTR + 0x8 location. */ return get_user(*val, q_rptr + 1); } + +static inline void update_dqm_wait_times(struct device_queue_manager *dqm) +{ + if (dqm->dev->kfd2kgd->get_iq_wait_times) + dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev, + &dqm->wait_times, + ffs(dqm->dev->xcc_mask) - 1); +} + + #endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 4984b41cd372..3f574d82b5fc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -396,14 +396,29 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, return retval; } -int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period) +/* pm_config_dequeue_wait_counts: Configure dequeue timer Wait Counts + * by writing to CP_IQ_WAIT_TIME2 registers. + * + * @cmd: See emum kfd_config_dequeue_wait_counts_cmd definition + * @value: Depends on the cmd. This parameter is unused for + * KFD_DEQUEUE_WAIT_INIT and KFD_DEQUEUE_WAIT_RESET. For + * KFD_DEQUEUE_WAIT_SET_SCH_WAVE it holds value to be set + * + */ +int pm_config_dequeue_wait_counts(struct packet_manager *pm, + enum kfd_config_dequeue_wait_counts_cmd cmd, + uint32_t value) { struct kfd_node *node = pm->dqm->dev; struct device *dev = node->adev->dev; int retval = 0; uint32_t *buffer, size; - size = pm->pmf->set_grace_period_size; + if (!pm->pmf->config_dequeue_wait_counts || + !pm->pmf->config_dequeue_wait_counts_size) + return 0; + + size = pm->pmf->config_dequeue_wait_counts_size; mutex_lock(&pm->lock); @@ -419,13 +434,18 @@ int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period) goto out; } - retval = pm->pmf->set_grace_period(pm, buffer, grace_period); + retval = pm->pmf->config_dequeue_wait_counts(pm, buffer, + cmd, value); if (!retval) retval = kq_submit_packet(pm->priv_queue); else kq_rollback_packet(pm->priv_queue); } + /* If default value is modified, cache that value in dqm->wait_times */ + if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT) + update_dqm_wait_times(pm->dqm); + out: mutex_unlock(&pm->lock); return retval; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index d56525201155..b9c611b249e6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -297,23 +297,54 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, return 0; } -static int pm_set_grace_period_v9(struct packet_manager *pm, +static inline void pm_build_dequeue_wait_counts_packet_info(struct packet_manager *pm, + uint32_t sch_value, uint32_t *reg_offset, + uint32_t *reg_data) +{ + pm->dqm->dev->kfd2kgd->build_grace_period_packet_info( + pm->dqm->dev->adev, + pm->dqm->wait_times, + sch_value, + reg_offset, + reg_data); +} + +static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm, uint32_t *buffer, - uint32_t grace_period) + enum kfd_config_dequeue_wait_counts_cmd cmd, + uint32_t value) { struct pm4_mec_write_data_mmio *packet; uint32_t reg_offset = 0; uint32_t reg_data = 0; - pm->dqm->dev->kfd2kgd->build_grace_period_packet_info( - pm->dqm->dev->adev, - pm->dqm->wait_times, - grace_period, - ®_offset, - ®_data); - - if (grace_period == USE_DEFAULT_GRACE_PERIOD) + switch (cmd) { + case KFD_DEQUEUE_WAIT_INIT: + /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */ + if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu && + (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3))) + pm_build_dequeue_wait_counts_packet_info(pm, 1, ®_offset, ®_data); + else + return 0; + break; + case KFD_DEQUEUE_WAIT_RESET: + /* function called only to get reg_offset */ + pm_build_dequeue_wait_counts_packet_info(pm, 0, ®_offset, ®_data); reg_data = pm->dqm->wait_times; + break; + + case KFD_DEQUEUE_WAIT_SET_SCH_WAVE: + /* The CP cannot handle value 0 and it will result in + * an infinite grace period being set so set to 1 to prevent this. + */ + if (!value) + value = 1; + pm_build_dequeue_wait_counts_packet_info(pm, value, ®_offset, ®_data); + break; + default: + pr_err("Invalid dequeue wait cmd\n"); + return -EINVAL; + } packet = (struct pm4_mec_write_data_mmio *)buffer; memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio)); @@ -415,7 +446,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = { .set_resources = pm_set_resources_v9, .map_queues = pm_map_queues_v9, .unmap_queues = pm_unmap_queues_v9, - .set_grace_period = pm_set_grace_period_v9, + .config_dequeue_wait_counts = pm_config_dequeue_wait_counts_v9, .query_status = pm_query_status_v9, .release_mem = NULL, .map_process_size = sizeof(struct pm4_mes_map_process), @@ -423,7 +454,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = { .set_resources_size = sizeof(struct pm4_mes_set_resources), .map_queues_size = sizeof(struct pm4_mes_map_queues), .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), - .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio), + .config_dequeue_wait_counts_size = sizeof(struct pm4_mec_write_data_mmio), .query_status_size = sizeof(struct pm4_mes_query_status), .release_mem_size = 0, }; @@ -434,7 +465,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = { .set_resources = pm_set_resources_v9, .map_queues = pm_map_queues_v9, .unmap_queues = pm_unmap_queues_v9, - .set_grace_period = pm_set_grace_period_v9, + .config_dequeue_wait_counts = pm_config_dequeue_wait_counts_v9, .query_status = pm_query_status_v9, .release_mem = NULL, .map_process_size = sizeof(struct pm4_mes_map_process_aldebaran), @@ -442,7 +473,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = { .set_resources_size = sizeof(struct pm4_mes_set_resources), .map_queues_size = sizeof(struct pm4_mes_map_queues), .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), - .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio), + .config_dequeue_wait_counts_size = sizeof(struct pm4_mec_write_data_mmio), .query_status_size = sizeof(struct pm4_mes_query_status), .release_mem_size = 0, }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c index 347c86e1c378..a1de5d7e173a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c @@ -304,7 +304,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = { .set_resources = pm_set_resources_vi, .map_queues = pm_map_queues_vi, .unmap_queues = pm_unmap_queues_vi, - .set_grace_period = NULL, + .config_dequeue_wait_counts = NULL, .query_status = pm_query_status_vi, .release_mem = pm_release_mem_vi, .map_process_size = sizeof(struct pm4_mes_map_process), @@ -312,7 +312,7 @@ const struct packet_manager_funcs kfd_vi_pm_funcs = { .set_resources_size = sizeof(struct pm4_mes_set_resources), .map_queues_size = sizeof(struct pm4_mes_map_queues), .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), - .set_grace_period_size = 0, + .config_dequeue_wait_counts_size = 0, .query_status_size = sizeof(struct pm4_mes_query_status), .release_mem_size = sizeof(struct pm4_mec_release_mem) }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index bb09c873a9a5..f6aedf69c644 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1389,6 +1389,24 @@ int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm, #define KFD_FENCE_COMPLETED (100) #define KFD_FENCE_INIT (10) +/** + * enum kfd_config_dequeue_wait_counts_cmd - Command for configuring + * dequeue wait counts. + * + * @KFD_DEQUEUE_WAIT_INIT: Set optimized dequeue wait counts for a + * certain ASICs. For these ASICs, this is default value used by RESET + * @KFD_DEQUEUE_WAIT_RESET: Reset dequeue wait counts to the optimized value + * for certain ASICs. For others set it to default hardware reset value + * @KFD_DEQUEUE_WAIT_SET_SCH_WAVE: Set context switch latency wait + * + */ +enum kfd_config_dequeue_wait_counts_cmd { + KFD_DEQUEUE_WAIT_INIT = 1, + KFD_DEQUEUE_WAIT_RESET = 2, + KFD_DEQUEUE_WAIT_SET_SCH_WAVE = 3 +}; + + struct packet_manager { struct device_queue_manager *dqm; struct kernel_queue *priv_queue; @@ -1414,8 +1432,8 @@ struct packet_manager_funcs { int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer, enum kfd_unmap_queues_filter mode, uint32_t filter_param, bool reset); - int (*set_grace_period)(struct packet_manager *pm, uint32_t *buffer, - uint32_t grace_period); + int (*config_dequeue_wait_counts)(struct packet_manager *pm, uint32_t *buffer, + enum kfd_config_dequeue_wait_counts_cmd cmd, uint32_t value); int (*query_status)(struct packet_manager *pm, uint32_t *buffer, uint64_t fence_address, uint64_t fence_value); int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer); @@ -1426,7 +1444,7 @@ struct packet_manager_funcs { int set_resources_size; int map_queues_size; int unmap_queues_size; - int set_grace_period_size; + int config_dequeue_wait_counts_size; int query_status_size; int release_mem_size; }; @@ -1449,7 +1467,9 @@ int pm_send_unmap_queue(struct packet_manager *pm, void pm_release_ib(struct packet_manager *pm); -int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period); +int pm_config_dequeue_wait_counts(struct packet_manager *pm, + enum kfd_config_dequeue_wait_counts_cmd cmd, + uint32_t wait_counts_config); /* Following PM funcs can be shared among VI and AI */ unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size); From 0d9cabc8f591ea1cd97c071b853b75b155c13259 Mon Sep 17 00:00:00 2001 From: Leon Huang Date: Tue, 11 Feb 2025 15:45:43 +0800 Subject: [PATCH 04/68] drm/amd/display: Fix incorrect DPCD configs while Replay/PSR switch [Why] When switching between PSR/Replay, the DPCD config of previous mode is not cleared, resulting in unexpected behavior in TCON. [How] Initialize the DPCD in setup function Reviewed-by: Robin Chen Signed-off-by: Leon Huang Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../link/protocols/link_edp_panel_control.c | 25 ++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index e0e3bb865359..1e4adbc764ea 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -675,6 +675,18 @@ bool edp_setup_psr(struct dc_link *link, if (!link) return false; + //Clear PSR cfg + memset(&psr_configuration, 0, sizeof(psr_configuration)); + dm_helpers_dp_write_dpcd( + link->ctx, + link, + DP_PSR_EN_CFG, + &psr_configuration.raw, + sizeof(psr_configuration.raw)); + + if (link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED) + return false; + dc = link->ctx->dc; dmcu = dc->res_pool->dmcu; psr = dc->res_pool->psr; @@ -685,9 +697,6 @@ bool edp_setup_psr(struct dc_link *link, if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst)) return false; - - memset(&psr_configuration, 0, sizeof(psr_configuration)); - psr_configuration.bits.ENABLE = 1; psr_configuration.bits.CRC_VERIFICATION = 1; psr_configuration.bits.FRAME_CAPTURE_INDICATION = @@ -950,6 +959,16 @@ bool edp_setup_replay(struct dc_link *link, const struct dc_stream_state *stream if (!link) return false; + //Clear Replay config + dm_helpers_dp_write_dpcd(link->ctx, link, + DP_SINK_PR_ENABLE_AND_CONFIGURATION, + (uint8_t *)&(replay_config.raw), sizeof(uint8_t)); + + if (!(link->replay_settings.config.replay_supported)) + return false; + + link->replay_settings.config.replay_error_status.raw = 0; + dc = link->ctx->dc; replay = dc->res_pool->replay; From 7b1ba19eb15f88e70782642ce2d934211269337b Mon Sep 17 00:00:00 2001 From: Leo Li Date: Thu, 20 Feb 2025 16:20:26 -0500 Subject: [PATCH 05/68] drm/amd/display: Disable unneeded hpd interrupts during dm_init [Why] It seems HPD interrupts are enabled by default for all connectors, even if the hpd source isn't valid. An eDP for example, does not have a valid hpd source (but does have a valid hpdrx source; see construct_phy()). Thus, eDPs should have their hpd interrupt disabled. In the past, this wasn't really an issue. Although the driver gets interrupted, then acks by writing to hw registers, there weren't any subscribed handlers that did anything meaningful (see register_hpd_handlers()). But things changed with the introduction of IPS. s2idle requires that the driver allows IPS for DMUB fw to put hw to sleep. Since register access requires hw to be awake, the driver will block IPS entry to do so. And no IPS means no hw sleep during s2idle. This was the observation on DCN35 systems with an eDP. During suspend, the eDP toggled its hpd pin as part of the panel power down sequence. The driver was then interrupted, and acked by writing to registers, blocking IPS entry. [How] Since DC marks eDP connections as having invalid hpd sources (see construct_phy()), DM should disable them at the hw level. Do so in amdgpu_dm_hpd_init() by disabling all hpd ints first, then selectively enabling ones for connectors that have valid hpd sources. Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Harry Wentland Signed-off-by: Leo Li Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c | 64 +++++++++++++------ 1 file changed, 45 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index 2b63cbab0e87..b61e210f6246 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -890,8 +890,16 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev) struct drm_device *dev = adev_to_drm(adev); struct drm_connector *connector; struct drm_connector_list_iter iter; + int irq_type; int i; + /* First, clear all hpd and hpdrx interrupts */ + for (i = DC_IRQ_SOURCE_HPD1; i <= DC_IRQ_SOURCE_HPD6RX; i++) { + if (!dc_interrupt_set(adev->dm.dc, i, false)) + drm_err(dev, "Failed to clear hpd(rx) source=%d on init\n", + i); + } + drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { struct amdgpu_dm_connector *amdgpu_dm_connector; @@ -904,10 +912,31 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev) dc_link = amdgpu_dm_connector->dc_link; + /* + * Get a base driver irq reference for hpd ints for the lifetime + * of dm. Note that only hpd interrupt types are registered with + * base driver; hpd_rx types aren't. IOW, amdgpu_irq_get/put on + * hpd_rx isn't available. DM currently controls hpd_rx + * explicitly with dc_interrupt_set() + */ if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) { - dc_interrupt_set(adev->dm.dc, - dc_link->irq_source_hpd, - true); + irq_type = dc_link->irq_source_hpd - DC_IRQ_SOURCE_HPD1; + /* + * TODO: There's a mismatch between mode_info.num_hpd + * and what bios reports as the # of connectors with hpd + * sources. Since the # of hpd source types registered + * with base driver == mode_info.num_hpd, we have to + * fallback to dc_interrupt_set for the remaining types. + */ + if (irq_type < adev->mode_info.num_hpd) { + if (amdgpu_irq_get(adev, &adev->hpd_irq, irq_type)) + drm_err(dev, "DM_IRQ: Failed get HPD for source=%d)!\n", + dc_link->irq_source_hpd); + } else { + dc_interrupt_set(adev->dm.dc, + dc_link->irq_source_hpd, + true); + } } if (dc_link->irq_source_hpd_rx != DC_IRQ_SOURCE_INVALID) { @@ -917,12 +946,6 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev) } } drm_connector_list_iter_end(&iter); - - /* Update reference counts for HPDs */ - for (i = DC_IRQ_SOURCE_HPD1; i <= adev->mode_info.num_hpd; i++) { - if (amdgpu_irq_get(adev, &adev->hpd_irq, i - DC_IRQ_SOURCE_HPD1)) - drm_err(dev, "DM_IRQ: Failed get HPD for source=%d)!\n", i); - } } /** @@ -938,7 +961,7 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev) struct drm_device *dev = adev_to_drm(adev); struct drm_connector *connector; struct drm_connector_list_iter iter; - int i; + int irq_type; drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { @@ -952,9 +975,18 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev) dc_link = amdgpu_dm_connector->dc_link; if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) { - dc_interrupt_set(adev->dm.dc, - dc_link->irq_source_hpd, - false); + irq_type = dc_link->irq_source_hpd - DC_IRQ_SOURCE_HPD1; + + /* TODO: See same TODO in amdgpu_dm_hpd_init() */ + if (irq_type < adev->mode_info.num_hpd) { + if (amdgpu_irq_put(adev, &adev->hpd_irq, irq_type)) + drm_err(dev, "DM_IRQ: Failed put HPD for source=%d!\n", + dc_link->irq_source_hpd); + } else { + dc_interrupt_set(adev->dm.dc, + dc_link->irq_source_hpd, + false); + } } if (dc_link->irq_source_hpd_rx != DC_IRQ_SOURCE_INVALID) { @@ -964,10 +996,4 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev) } } drm_connector_list_iter_end(&iter); - - /* Update reference counts for HPDs */ - for (i = DC_IRQ_SOURCE_HPD1; i <= adev->mode_info.num_hpd; i++) { - if (amdgpu_irq_put(adev, &adev->hpd_irq, i - DC_IRQ_SOURCE_HPD1)) - drm_err(dev, "DM_IRQ: Failed put HPD for source=%d!\n", i); - } } From d93b92c976671bf3352ad808c3783d37f62e9a0a Mon Sep 17 00:00:00 2001 From: Joshua Aberback Date: Tue, 25 Feb 2025 16:17:25 -0500 Subject: [PATCH 06/68] drm/amd/display: Add more debug data to dmub_srv [Why] When analyzing some crash dumps, not all of the expected DMUB info was available, so we want to add in-object storage for this data. [How] - dmub_srv_debug (renamed to dmub_timeout_info) is already a member of dmub_diagnostic_data, therefore keep a dmub_diagnostic_data directly in dmub_srv - use dmub_srv->debug when collecting diagnostic info instead of stack object to allow for easy inspection in crash dumps Reviewed-by: Alvin Lee Signed-off-by: Joshua Aberback Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 89 +++++++++-------- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h | 2 +- drivers/gpu/drm/amd/display/dmub/dmub_srv.h | 10 +- .../gpu/drm/amd/display/dmub/src/dmub_dcn20.c | 85 ++++++++-------- .../gpu/drm/amd/display/dmub/src/dmub_dcn20.h | 2 +- .../gpu/drm/amd/display/dmub/src/dmub_dcn31.c | 93 +++++++++--------- .../gpu/drm/amd/display/dmub/src/dmub_dcn31.h | 2 +- .../gpu/drm/amd/display/dmub/src/dmub_dcn32.c | 98 ++++++++++--------- .../gpu/drm/amd/display/dmub/src/dmub_dcn32.h | 2 +- .../gpu/drm/amd/display/dmub/src/dmub_dcn35.c | 93 +++++++++--------- .../gpu/drm/amd/display/dmub/src/dmub_dcn35.h | 2 +- .../drm/amd/display/dmub/src/dmub_dcn401.c | 97 +++++++++--------- .../drm/amd/display/dmub/src/dmub_dcn401.h | 2 +- .../gpu/drm/amd/display/dmub/src/dmub_srv.c | 6 +- 14 files changed, 298 insertions(+), 285 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 87b4c2793df3..614e03bfd598 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -200,10 +200,10 @@ bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv, if (status != DMUB_STATUS_OK) { DC_LOG_DEBUG("No reply for DMUB command: status=%d\n", status); - if (!dmub->debug.timeout_occured) { - dmub->debug.timeout_occured = true; - dmub->debug.timeout_cmd = *cmd_list; - dmub->debug.timestamp = dm_get_timestamp(dc_dmub_srv->ctx); + if (!dmub->debug.timeout_info.timeout_occured) { + dmub->debug.timeout_info.timeout_occured = true; + dmub->debug.timeout_info.timeout_cmd = *cmd_list; + dmub->debug.timeout_info.timestamp = dm_get_timestamp(dc_dmub_srv->ctx); } dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); return false; @@ -927,16 +927,15 @@ void dc_dmub_setup_subvp_dmub_command(struct dc *dc, dc_wake_and_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } -bool dc_dmub_srv_get_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv, struct dmub_diagnostic_data *diag_data) +bool dc_dmub_srv_get_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv) { - if (!dc_dmub_srv || !dc_dmub_srv->dmub || !diag_data) + if (!dc_dmub_srv || !dc_dmub_srv->dmub) return false; - return dmub_srv_get_diagnostic_data(dc_dmub_srv->dmub, diag_data); + return dmub_srv_get_diagnostic_data(dc_dmub_srv->dmub); } void dc_dmub_srv_log_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv) { - struct dmub_diagnostic_data diag_data = {0}; uint32_t i; if (!dc_dmub_srv || !dc_dmub_srv->dmub) { @@ -946,49 +945,49 @@ void dc_dmub_srv_log_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv) DC_LOG_ERROR("%s: DMCUB error - collecting diagnostic data\n", __func__); - if (!dc_dmub_srv_get_diagnostic_data(dc_dmub_srv, &diag_data)) { + if (!dc_dmub_srv_get_diagnostic_data(dc_dmub_srv)) { DC_LOG_ERROR("%s: dc_dmub_srv_get_diagnostic_data failed.", __func__); return; } DC_LOG_DEBUG("DMCUB STATE:"); - DC_LOG_DEBUG(" dmcub_version : %08x", diag_data.dmcub_version); - DC_LOG_DEBUG(" scratch [0] : %08x", diag_data.scratch[0]); - DC_LOG_DEBUG(" scratch [1] : %08x", diag_data.scratch[1]); - DC_LOG_DEBUG(" scratch [2] : %08x", diag_data.scratch[2]); - DC_LOG_DEBUG(" scratch [3] : %08x", diag_data.scratch[3]); - DC_LOG_DEBUG(" scratch [4] : %08x", diag_data.scratch[4]); - DC_LOG_DEBUG(" scratch [5] : %08x", diag_data.scratch[5]); - DC_LOG_DEBUG(" scratch [6] : %08x", diag_data.scratch[6]); - DC_LOG_DEBUG(" scratch [7] : %08x", diag_data.scratch[7]); - DC_LOG_DEBUG(" scratch [8] : %08x", diag_data.scratch[8]); - DC_LOG_DEBUG(" scratch [9] : %08x", diag_data.scratch[9]); - DC_LOG_DEBUG(" scratch [10] : %08x", diag_data.scratch[10]); - DC_LOG_DEBUG(" scratch [11] : %08x", diag_data.scratch[11]); - DC_LOG_DEBUG(" scratch [12] : %08x", diag_data.scratch[12]); - DC_LOG_DEBUG(" scratch [13] : %08x", diag_data.scratch[13]); - DC_LOG_DEBUG(" scratch [14] : %08x", diag_data.scratch[14]); - DC_LOG_DEBUG(" scratch [15] : %08x", diag_data.scratch[15]); + DC_LOG_DEBUG(" dmcub_version : %08x", dc_dmub_srv->dmub->debug.dmcub_version); + DC_LOG_DEBUG(" scratch [0] : %08x", dc_dmub_srv->dmub->debug.scratch[0]); + DC_LOG_DEBUG(" scratch [1] : %08x", dc_dmub_srv->dmub->debug.scratch[1]); + DC_LOG_DEBUG(" scratch [2] : %08x", dc_dmub_srv->dmub->debug.scratch[2]); + DC_LOG_DEBUG(" scratch [3] : %08x", dc_dmub_srv->dmub->debug.scratch[3]); + DC_LOG_DEBUG(" scratch [4] : %08x", dc_dmub_srv->dmub->debug.scratch[4]); + DC_LOG_DEBUG(" scratch [5] : %08x", dc_dmub_srv->dmub->debug.scratch[5]); + DC_LOG_DEBUG(" scratch [6] : %08x", dc_dmub_srv->dmub->debug.scratch[6]); + DC_LOG_DEBUG(" scratch [7] : %08x", dc_dmub_srv->dmub->debug.scratch[7]); + DC_LOG_DEBUG(" scratch [8] : %08x", dc_dmub_srv->dmub->debug.scratch[8]); + DC_LOG_DEBUG(" scratch [9] : %08x", dc_dmub_srv->dmub->debug.scratch[9]); + DC_LOG_DEBUG(" scratch [10] : %08x", dc_dmub_srv->dmub->debug.scratch[10]); + DC_LOG_DEBUG(" scratch [11] : %08x", dc_dmub_srv->dmub->debug.scratch[11]); + DC_LOG_DEBUG(" scratch [12] : %08x", dc_dmub_srv->dmub->debug.scratch[12]); + DC_LOG_DEBUG(" scratch [13] : %08x", dc_dmub_srv->dmub->debug.scratch[13]); + DC_LOG_DEBUG(" scratch [14] : %08x", dc_dmub_srv->dmub->debug.scratch[14]); + DC_LOG_DEBUG(" scratch [15] : %08x", dc_dmub_srv->dmub->debug.scratch[15]); for (i = 0; i < DMUB_PC_SNAPSHOT_COUNT; i++) - DC_LOG_DEBUG(" pc[%d] : %08x", i, diag_data.pc[i]); - DC_LOG_DEBUG(" unk_fault_addr : %08x", diag_data.undefined_address_fault_addr); - DC_LOG_DEBUG(" inst_fault_addr : %08x", diag_data.inst_fetch_fault_addr); - DC_LOG_DEBUG(" data_fault_addr : %08x", diag_data.data_write_fault_addr); - DC_LOG_DEBUG(" inbox1_rptr : %08x", diag_data.inbox1_rptr); - DC_LOG_DEBUG(" inbox1_wptr : %08x", diag_data.inbox1_wptr); - DC_LOG_DEBUG(" inbox1_size : %08x", diag_data.inbox1_size); - DC_LOG_DEBUG(" inbox0_rptr : %08x", diag_data.inbox0_rptr); - DC_LOG_DEBUG(" inbox0_wptr : %08x", diag_data.inbox0_wptr); - DC_LOG_DEBUG(" inbox0_size : %08x", diag_data.inbox0_size); - DC_LOG_DEBUG(" outbox1_rptr : %08x", diag_data.outbox1_rptr); - DC_LOG_DEBUG(" outbox1_wptr : %08x", diag_data.outbox1_wptr); - DC_LOG_DEBUG(" outbox1_size : %08x", diag_data.outbox1_size); - DC_LOG_DEBUG(" is_enabled : %d", diag_data.is_dmcub_enabled); - DC_LOG_DEBUG(" is_soft_reset : %d", diag_data.is_dmcub_soft_reset); - DC_LOG_DEBUG(" is_secure_reset : %d", diag_data.is_dmcub_secure_reset); - DC_LOG_DEBUG(" is_traceport_en : %d", diag_data.is_traceport_en); - DC_LOG_DEBUG(" is_cw0_en : %d", diag_data.is_cw0_enabled); - DC_LOG_DEBUG(" is_cw6_en : %d", diag_data.is_cw6_enabled); + DC_LOG_DEBUG(" pc[%d] : %08x", i, dc_dmub_srv->dmub->debug.pc[i]); + DC_LOG_DEBUG(" unk_fault_addr : %08x", dc_dmub_srv->dmub->debug.undefined_address_fault_addr); + DC_LOG_DEBUG(" inst_fault_addr : %08x", dc_dmub_srv->dmub->debug.inst_fetch_fault_addr); + DC_LOG_DEBUG(" data_fault_addr : %08x", dc_dmub_srv->dmub->debug.data_write_fault_addr); + DC_LOG_DEBUG(" inbox1_rptr : %08x", dc_dmub_srv->dmub->debug.inbox1_rptr); + DC_LOG_DEBUG(" inbox1_wptr : %08x", dc_dmub_srv->dmub->debug.inbox1_wptr); + DC_LOG_DEBUG(" inbox1_size : %08x", dc_dmub_srv->dmub->debug.inbox1_size); + DC_LOG_DEBUG(" inbox0_rptr : %08x", dc_dmub_srv->dmub->debug.inbox0_rptr); + DC_LOG_DEBUG(" inbox0_wptr : %08x", dc_dmub_srv->dmub->debug.inbox0_wptr); + DC_LOG_DEBUG(" inbox0_size : %08x", dc_dmub_srv->dmub->debug.inbox0_size); + DC_LOG_DEBUG(" outbox1_rptr : %08x", dc_dmub_srv->dmub->debug.outbox1_rptr); + DC_LOG_DEBUG(" outbox1_wptr : %08x", dc_dmub_srv->dmub->debug.outbox1_wptr); + DC_LOG_DEBUG(" outbox1_size : %08x", dc_dmub_srv->dmub->debug.outbox1_size); + DC_LOG_DEBUG(" is_enabled : %d", dc_dmub_srv->dmub->debug.is_dmcub_enabled); + DC_LOG_DEBUG(" is_soft_reset : %d", dc_dmub_srv->dmub->debug.is_dmcub_soft_reset); + DC_LOG_DEBUG(" is_secure_reset : %d", dc_dmub_srv->dmub->debug.is_dmcub_secure_reset); + DC_LOG_DEBUG(" is_traceport_en : %d", dc_dmub_srv->dmub->debug.is_traceport_en); + DC_LOG_DEBUG(" is_cw0_en : %d", dc_dmub_srv->dmub->debug.is_cw0_enabled); + DC_LOG_DEBUG(" is_cw6_en : %d", dc_dmub_srv->dmub->debug.is_cw6_enabled); } static bool dc_dmub_should_update_cursor_data(struct pipe_ctx *pipe_ctx) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index 10b48198b7a6..a636f4c3f01d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -94,7 +94,7 @@ void dc_dmub_srv_clear_inbox0_ack(struct dc_dmub_srv *dmub_srv); void dc_dmub_srv_wait_for_inbox0_ack(struct dc_dmub_srv *dmub_srv); void dc_dmub_srv_send_inbox0_cmd(struct dc_dmub_srv *dmub_srv, union dmub_inbox0_data_register data); -bool dc_dmub_srv_get_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv, struct dmub_diagnostic_data *dmub_oca); +bool dc_dmub_srv_get_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv); void dc_dmub_setup_subvp_dmub_command(struct dc *dc, struct dc_state *context, bool enable); void dc_dmub_srv_log_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv); diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index 0787cc3904d6..8ebeb34021e5 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -313,7 +313,7 @@ struct dmub_srv_hw_params { * @timeout_occured: Indicates a timeout occured on any message from driver to dmub * @timeout_cmd: first cmd sent from driver that timed out - subsequent timeouts are not stored */ -struct dmub_srv_debug { +struct dmub_timeout_info { bool timeout_occured; union dmub_rb_cmd timeout_cmd; unsigned long long timestamp; @@ -340,7 +340,7 @@ struct dmub_diagnostic_data { uint32_t outbox1_wptr; uint32_t outbox1_size; uint32_t gpint_datain0; - struct dmub_srv_debug timeout_info; + struct dmub_timeout_info timeout_info; uint8_t is_dmcub_enabled : 1; uint8_t is_dmcub_soft_reset : 1; uint8_t is_dmcub_secure_reset : 1; @@ -456,7 +456,7 @@ struct dmub_srv_hw_funcs { void (*send_inbox0_cmd)(struct dmub_srv *dmub, union dmub_inbox0_data_register data); uint32_t (*get_current_time)(struct dmub_srv *dmub); - void (*get_diagnostic_data)(struct dmub_srv *dmub, struct dmub_diagnostic_data *dmub_oca); + void (*get_diagnostic_data)(struct dmub_srv *dmub); bool (*should_detect)(struct dmub_srv *dmub); void (*init_reg_offsets)(struct dmub_srv *dmub, struct dc_context *ctx); @@ -545,7 +545,7 @@ struct dmub_srv { struct dmub_visual_confirm_color visual_confirm_color; enum dmub_srv_power_state_type power_state; - struct dmub_srv_debug debug; + struct dmub_diagnostic_data debug; }; /** @@ -901,7 +901,7 @@ enum dmub_status dmub_srv_set_skip_panel_power_sequence(struct dmub_srv *dmub, bool dmub_srv_get_outbox0_msg(struct dmub_srv *dmub, struct dmcub_trace_buf_entry *entry); -bool dmub_srv_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data); +bool dmub_srv_get_diagnostic_data(struct dmub_srv *dmub); bool dmub_srv_should_detect(struct dmub_srv *dmub); diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c index e500ca9ae09c..73888c1bea93 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c @@ -414,63 +414,66 @@ uint32_t dmub_dcn20_get_current_time(struct dmub_srv *dmub) return REG_READ(DMCUB_TIMER_CURRENT); } -void dmub_dcn20_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data) +void dmub_dcn20_get_diagnostic_data(struct dmub_srv *dmub) { uint32_t is_dmub_enabled, is_soft_reset, is_sec_reset; uint32_t is_traceport_enabled, is_cw0_enabled, is_cw6_enabled; + struct dmub_timeout_info timeout = {0}; - if (!dmub || !diag_data) + if (!dmub) return; - memset(diag_data, 0, sizeof(*diag_data)); - - diag_data->dmcub_version = dmub->fw_version; - - diag_data->scratch[0] = REG_READ(DMCUB_SCRATCH0); - diag_data->scratch[1] = REG_READ(DMCUB_SCRATCH1); - diag_data->scratch[2] = REG_READ(DMCUB_SCRATCH2); - diag_data->scratch[3] = REG_READ(DMCUB_SCRATCH3); - diag_data->scratch[4] = REG_READ(DMCUB_SCRATCH4); - diag_data->scratch[5] = REG_READ(DMCUB_SCRATCH5); - diag_data->scratch[6] = REG_READ(DMCUB_SCRATCH6); - diag_data->scratch[7] = REG_READ(DMCUB_SCRATCH7); - diag_data->scratch[8] = REG_READ(DMCUB_SCRATCH8); - diag_data->scratch[9] = REG_READ(DMCUB_SCRATCH9); - diag_data->scratch[10] = REG_READ(DMCUB_SCRATCH10); - diag_data->scratch[11] = REG_READ(DMCUB_SCRATCH11); - diag_data->scratch[12] = REG_READ(DMCUB_SCRATCH12); - diag_data->scratch[13] = REG_READ(DMCUB_SCRATCH13); - diag_data->scratch[14] = REG_READ(DMCUB_SCRATCH14); - diag_data->scratch[15] = REG_READ(DMCUB_SCRATCH15); - - diag_data->undefined_address_fault_addr = REG_READ(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR); - diag_data->inst_fetch_fault_addr = REG_READ(DMCUB_INST_FETCH_FAULT_ADDR); - diag_data->data_write_fault_addr = REG_READ(DMCUB_DATA_WRITE_FAULT_ADDR); - - diag_data->inbox1_rptr = REG_READ(DMCUB_INBOX1_RPTR); - diag_data->inbox1_wptr = REG_READ(DMCUB_INBOX1_WPTR); - diag_data->inbox1_size = REG_READ(DMCUB_INBOX1_SIZE); - - diag_data->inbox0_rptr = REG_READ(DMCUB_INBOX0_RPTR); - diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); - diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); + /* timeout data filled externally, cache before resetting memory */ + timeout = dmub->debug.timeout_info; + memset(&dmub->debug, 0, sizeof(dmub->debug)); + dmub->debug.timeout_info = timeout; + + dmub->debug.dmcub_version = dmub->fw_version; + + dmub->debug.scratch[0] = REG_READ(DMCUB_SCRATCH0); + dmub->debug.scratch[1] = REG_READ(DMCUB_SCRATCH1); + dmub->debug.scratch[2] = REG_READ(DMCUB_SCRATCH2); + dmub->debug.scratch[3] = REG_READ(DMCUB_SCRATCH3); + dmub->debug.scratch[4] = REG_READ(DMCUB_SCRATCH4); + dmub->debug.scratch[5] = REG_READ(DMCUB_SCRATCH5); + dmub->debug.scratch[6] = REG_READ(DMCUB_SCRATCH6); + dmub->debug.scratch[7] = REG_READ(DMCUB_SCRATCH7); + dmub->debug.scratch[8] = REG_READ(DMCUB_SCRATCH8); + dmub->debug.scratch[9] = REG_READ(DMCUB_SCRATCH9); + dmub->debug.scratch[10] = REG_READ(DMCUB_SCRATCH10); + dmub->debug.scratch[11] = REG_READ(DMCUB_SCRATCH11); + dmub->debug.scratch[12] = REG_READ(DMCUB_SCRATCH12); + dmub->debug.scratch[13] = REG_READ(DMCUB_SCRATCH13); + dmub->debug.scratch[14] = REG_READ(DMCUB_SCRATCH14); + dmub->debug.scratch[15] = REG_READ(DMCUB_SCRATCH15); + + dmub->debug.undefined_address_fault_addr = REG_READ(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR); + dmub->debug.inst_fetch_fault_addr = REG_READ(DMCUB_INST_FETCH_FAULT_ADDR); + dmub->debug.data_write_fault_addr = REG_READ(DMCUB_DATA_WRITE_FAULT_ADDR); + + dmub->debug.inbox1_rptr = REG_READ(DMCUB_INBOX1_RPTR); + dmub->debug.inbox1_wptr = REG_READ(DMCUB_INBOX1_WPTR); + dmub->debug.inbox1_size = REG_READ(DMCUB_INBOX1_SIZE); + + dmub->debug.inbox0_rptr = REG_READ(DMCUB_INBOX0_RPTR); + dmub->debug.inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); + dmub->debug.inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); - diag_data->is_dmcub_enabled = is_dmub_enabled; + dmub->debug.is_dmcub_enabled = is_dmub_enabled; REG_GET(DMCUB_CNTL, DMCUB_SOFT_RESET, &is_soft_reset); - diag_data->is_dmcub_soft_reset = is_soft_reset; + dmub->debug.is_dmcub_soft_reset = is_soft_reset; REG_GET(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS, &is_sec_reset); - diag_data->is_dmcub_secure_reset = is_sec_reset; + dmub->debug.is_dmcub_secure_reset = is_sec_reset; REG_GET(DMCUB_CNTL, DMCUB_TRACEPORT_EN, &is_traceport_enabled); - diag_data->is_traceport_en = is_traceport_enabled; + dmub->debug.is_traceport_en = is_traceport_enabled; REG_GET(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_ENABLE, &is_cw0_enabled); - diag_data->is_cw0_enabled = is_cw0_enabled; + dmub->debug.is_cw0_enabled = is_cw0_enabled; REG_GET(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE, &is_cw6_enabled); - diag_data->is_cw6_enabled = is_cw6_enabled; - diag_data->timeout_info = dmub->debug; + dmub->debug.is_cw6_enabled = is_cw6_enabled; } diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h index de287b101848..42c1fb4bc73f 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h @@ -247,6 +247,6 @@ bool dmub_dcn20_use_cached_trace_buffer(struct dmub_srv *dmub); uint32_t dmub_dcn20_get_current_time(struct dmub_srv *dmub); -void dmub_dcn20_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *dmub_oca); +void dmub_dcn20_get_diagnostic_data(struct dmub_srv *dmub); #endif /* _DMUB_DCN20_H_ */ diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c index 9796077885c9..a308bd604677 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.c @@ -414,69 +414,72 @@ uint32_t dmub_dcn31_get_current_time(struct dmub_srv *dmub) return REG_READ(DMCUB_TIMER_CURRENT); } -void dmub_dcn31_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data) +void dmub_dcn31_get_diagnostic_data(struct dmub_srv *dmub) { uint32_t is_dmub_enabled, is_soft_reset, is_sec_reset; uint32_t is_traceport_enabled, is_cw0_enabled, is_cw6_enabled; + struct dmub_timeout_info timeout = {0}; - if (!dmub || !diag_data) + if (!dmub) return; - memset(diag_data, 0, sizeof(*diag_data)); - - diag_data->dmcub_version = dmub->fw_version; - - diag_data->scratch[0] = REG_READ(DMCUB_SCRATCH0); - diag_data->scratch[1] = REG_READ(DMCUB_SCRATCH1); - diag_data->scratch[2] = REG_READ(DMCUB_SCRATCH2); - diag_data->scratch[3] = REG_READ(DMCUB_SCRATCH3); - diag_data->scratch[4] = REG_READ(DMCUB_SCRATCH4); - diag_data->scratch[5] = REG_READ(DMCUB_SCRATCH5); - diag_data->scratch[6] = REG_READ(DMCUB_SCRATCH6); - diag_data->scratch[7] = REG_READ(DMCUB_SCRATCH7); - diag_data->scratch[8] = REG_READ(DMCUB_SCRATCH8); - diag_data->scratch[9] = REG_READ(DMCUB_SCRATCH9); - diag_data->scratch[10] = REG_READ(DMCUB_SCRATCH10); - diag_data->scratch[11] = REG_READ(DMCUB_SCRATCH11); - diag_data->scratch[12] = REG_READ(DMCUB_SCRATCH12); - diag_data->scratch[13] = REG_READ(DMCUB_SCRATCH13); - diag_data->scratch[14] = REG_READ(DMCUB_SCRATCH14); - diag_data->scratch[15] = REG_READ(DMCUB_SCRATCH15); - - diag_data->undefined_address_fault_addr = REG_READ(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR); - diag_data->inst_fetch_fault_addr = REG_READ(DMCUB_INST_FETCH_FAULT_ADDR); - diag_data->data_write_fault_addr = REG_READ(DMCUB_DATA_WRITE_FAULT_ADDR); - - diag_data->inbox1_rptr = REG_READ(DMCUB_INBOX1_RPTR); - diag_data->inbox1_wptr = REG_READ(DMCUB_INBOX1_WPTR); - diag_data->inbox1_size = REG_READ(DMCUB_INBOX1_SIZE); - - diag_data->inbox0_rptr = REG_READ(DMCUB_INBOX0_RPTR); - diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); - diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); - - diag_data->outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR); - diag_data->outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR); - diag_data->outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE); + /* timeout data filled externally, cache before resetting memory */ + timeout = dmub->debug.timeout_info; + memset(&dmub->debug, 0, sizeof(dmub->debug)); + dmub->debug.timeout_info = timeout; + + dmub->debug.dmcub_version = dmub->fw_version; + + dmub->debug.scratch[0] = REG_READ(DMCUB_SCRATCH0); + dmub->debug.scratch[1] = REG_READ(DMCUB_SCRATCH1); + dmub->debug.scratch[2] = REG_READ(DMCUB_SCRATCH2); + dmub->debug.scratch[3] = REG_READ(DMCUB_SCRATCH3); + dmub->debug.scratch[4] = REG_READ(DMCUB_SCRATCH4); + dmub->debug.scratch[5] = REG_READ(DMCUB_SCRATCH5); + dmub->debug.scratch[6] = REG_READ(DMCUB_SCRATCH6); + dmub->debug.scratch[7] = REG_READ(DMCUB_SCRATCH7); + dmub->debug.scratch[8] = REG_READ(DMCUB_SCRATCH8); + dmub->debug.scratch[9] = REG_READ(DMCUB_SCRATCH9); + dmub->debug.scratch[10] = REG_READ(DMCUB_SCRATCH10); + dmub->debug.scratch[11] = REG_READ(DMCUB_SCRATCH11); + dmub->debug.scratch[12] = REG_READ(DMCUB_SCRATCH12); + dmub->debug.scratch[13] = REG_READ(DMCUB_SCRATCH13); + dmub->debug.scratch[14] = REG_READ(DMCUB_SCRATCH14); + dmub->debug.scratch[15] = REG_READ(DMCUB_SCRATCH15); + + dmub->debug.undefined_address_fault_addr = REG_READ(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR); + dmub->debug.inst_fetch_fault_addr = REG_READ(DMCUB_INST_FETCH_FAULT_ADDR); + dmub->debug.data_write_fault_addr = REG_READ(DMCUB_DATA_WRITE_FAULT_ADDR); + + dmub->debug.inbox1_rptr = REG_READ(DMCUB_INBOX1_RPTR); + dmub->debug.inbox1_wptr = REG_READ(DMCUB_INBOX1_WPTR); + dmub->debug.inbox1_size = REG_READ(DMCUB_INBOX1_SIZE); + + dmub->debug.inbox0_rptr = REG_READ(DMCUB_INBOX0_RPTR); + dmub->debug.inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); + dmub->debug.inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); + + dmub->debug.outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR); + dmub->debug.outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR); + dmub->debug.outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE); REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); - diag_data->is_dmcub_enabled = is_dmub_enabled; + dmub->debug.is_dmcub_enabled = is_dmub_enabled; REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &is_soft_reset); - diag_data->is_dmcub_soft_reset = is_soft_reset; + dmub->debug.is_dmcub_soft_reset = is_soft_reset; REG_GET(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS, &is_sec_reset); - diag_data->is_dmcub_secure_reset = is_sec_reset; + dmub->debug.is_dmcub_secure_reset = is_sec_reset; REG_GET(DMCUB_CNTL, DMCUB_TRACEPORT_EN, &is_traceport_enabled); - diag_data->is_traceport_en = is_traceport_enabled; + dmub->debug.is_traceport_en = is_traceport_enabled; REG_GET(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_ENABLE, &is_cw0_enabled); - diag_data->is_cw0_enabled = is_cw0_enabled; + dmub->debug.is_cw0_enabled = is_cw0_enabled; REG_GET(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE, &is_cw6_enabled); - diag_data->is_cw6_enabled = is_cw6_enabled; - diag_data->timeout_info = dmub->debug; + dmub->debug.is_cw6_enabled = is_cw6_enabled; } bool dmub_dcn31_should_detect(struct dmub_srv *dmub) diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h index eccdab4986ce..1c43ef2bca66 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn31.h @@ -251,7 +251,7 @@ void dmub_dcn31_set_outbox0_rptr(struct dmub_srv *dmub, uint32_t rptr_offset); uint32_t dmub_dcn31_get_current_time(struct dmub_srv *dmub); -void dmub_dcn31_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data); +void dmub_dcn31_get_diagnostic_data(struct dmub_srv *dmub); bool dmub_dcn31_should_detect(struct dmub_srv *dmub); diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c index 9600b7f858b0..e7056205b050 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.c @@ -417,73 +417,75 @@ uint32_t dmub_dcn32_get_current_time(struct dmub_srv *dmub) return REG_READ(DMCUB_TIMER_CURRENT); } -void dmub_dcn32_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data) +void dmub_dcn32_get_diagnostic_data(struct dmub_srv *dmub) { uint32_t is_dmub_enabled, is_soft_reset, is_sec_reset; uint32_t is_traceport_enabled, is_cw0_enabled, is_cw6_enabled; + struct dmub_timeout_info timeout = {0}; - if (!dmub || !diag_data) + if (!dmub) return; - memset(diag_data, 0, sizeof(*diag_data)); - - diag_data->dmcub_version = dmub->fw_version; - - diag_data->scratch[0] = REG_READ(DMCUB_SCRATCH0); - diag_data->scratch[1] = REG_READ(DMCUB_SCRATCH1); - diag_data->scratch[2] = REG_READ(DMCUB_SCRATCH2); - diag_data->scratch[3] = REG_READ(DMCUB_SCRATCH3); - diag_data->scratch[4] = REG_READ(DMCUB_SCRATCH4); - diag_data->scratch[5] = REG_READ(DMCUB_SCRATCH5); - diag_data->scratch[6] = REG_READ(DMCUB_SCRATCH6); - diag_data->scratch[7] = REG_READ(DMCUB_SCRATCH7); - diag_data->scratch[8] = REG_READ(DMCUB_SCRATCH8); - diag_data->scratch[9] = REG_READ(DMCUB_SCRATCH9); - diag_data->scratch[10] = REG_READ(DMCUB_SCRATCH10); - diag_data->scratch[11] = REG_READ(DMCUB_SCRATCH11); - diag_data->scratch[12] = REG_READ(DMCUB_SCRATCH12); - diag_data->scratch[13] = REG_READ(DMCUB_SCRATCH13); - diag_data->scratch[14] = REG_READ(DMCUB_SCRATCH14); - diag_data->scratch[15] = REG_READ(DMCUB_SCRATCH15); - diag_data->scratch[16] = REG_READ(DMCUB_SCRATCH16); - - diag_data->undefined_address_fault_addr = REG_READ(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR); - diag_data->inst_fetch_fault_addr = REG_READ(DMCUB_INST_FETCH_FAULT_ADDR); - diag_data->data_write_fault_addr = REG_READ(DMCUB_DATA_WRITE_FAULT_ADDR); - - diag_data->inbox1_rptr = REG_READ(DMCUB_INBOX1_RPTR); - diag_data->inbox1_wptr = REG_READ(DMCUB_INBOX1_WPTR); - diag_data->inbox1_size = REG_READ(DMCUB_INBOX1_SIZE); - - diag_data->inbox0_rptr = REG_READ(DMCUB_INBOX0_RPTR); - diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); - diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); - - diag_data->outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR); - diag_data->outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR); - diag_data->outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE); + /* timeout data filled externally, cache before resetting memory */ + timeout = dmub->debug.timeout_info; + memset(&dmub->debug, 0, sizeof(dmub->debug)); + dmub->debug.timeout_info = timeout; + + dmub->debug.dmcub_version = dmub->fw_version; + + dmub->debug.scratch[0] = REG_READ(DMCUB_SCRATCH0); + dmub->debug.scratch[1] = REG_READ(DMCUB_SCRATCH1); + dmub->debug.scratch[2] = REG_READ(DMCUB_SCRATCH2); + dmub->debug.scratch[3] = REG_READ(DMCUB_SCRATCH3); + dmub->debug.scratch[4] = REG_READ(DMCUB_SCRATCH4); + dmub->debug.scratch[5] = REG_READ(DMCUB_SCRATCH5); + dmub->debug.scratch[6] = REG_READ(DMCUB_SCRATCH6); + dmub->debug.scratch[7] = REG_READ(DMCUB_SCRATCH7); + dmub->debug.scratch[8] = REG_READ(DMCUB_SCRATCH8); + dmub->debug.scratch[9] = REG_READ(DMCUB_SCRATCH9); + dmub->debug.scratch[10] = REG_READ(DMCUB_SCRATCH10); + dmub->debug.scratch[11] = REG_READ(DMCUB_SCRATCH11); + dmub->debug.scratch[12] = REG_READ(DMCUB_SCRATCH12); + dmub->debug.scratch[13] = REG_READ(DMCUB_SCRATCH13); + dmub->debug.scratch[14] = REG_READ(DMCUB_SCRATCH14); + dmub->debug.scratch[15] = REG_READ(DMCUB_SCRATCH15); + dmub->debug.scratch[16] = REG_READ(DMCUB_SCRATCH16); + + dmub->debug.undefined_address_fault_addr = REG_READ(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR); + dmub->debug.inst_fetch_fault_addr = REG_READ(DMCUB_INST_FETCH_FAULT_ADDR); + dmub->debug.data_write_fault_addr = REG_READ(DMCUB_DATA_WRITE_FAULT_ADDR); + + dmub->debug.inbox1_rptr = REG_READ(DMCUB_INBOX1_RPTR); + dmub->debug.inbox1_wptr = REG_READ(DMCUB_INBOX1_WPTR); + dmub->debug.inbox1_size = REG_READ(DMCUB_INBOX1_SIZE); + + dmub->debug.inbox0_rptr = REG_READ(DMCUB_INBOX0_RPTR); + dmub->debug.inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); + dmub->debug.inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); + + dmub->debug.outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR); + dmub->debug.outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR); + dmub->debug.outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE); REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); - diag_data->is_dmcub_enabled = is_dmub_enabled; + dmub->debug.is_dmcub_enabled = is_dmub_enabled; REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &is_soft_reset); - diag_data->is_dmcub_soft_reset = is_soft_reset; + dmub->debug.is_dmcub_soft_reset = is_soft_reset; REG_GET(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS, &is_sec_reset); - diag_data->is_dmcub_secure_reset = is_sec_reset; + dmub->debug.is_dmcub_secure_reset = is_sec_reset; REG_GET(DMCUB_CNTL, DMCUB_TRACEPORT_EN, &is_traceport_enabled); - diag_data->is_traceport_en = is_traceport_enabled; + dmub->debug.is_traceport_en = is_traceport_enabled; REG_GET(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_ENABLE, &is_cw0_enabled); - diag_data->is_cw0_enabled = is_cw0_enabled; + dmub->debug.is_cw0_enabled = is_cw0_enabled; REG_GET(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE, &is_cw6_enabled); - diag_data->is_cw6_enabled = is_cw6_enabled; + dmub->debug.is_cw6_enabled = is_cw6_enabled; - diag_data->gpint_datain0 = REG_READ(DMCUB_GPINT_DATAIN0); - - diag_data->timeout_info = dmub->debug; + dmub->debug.gpint_datain0 = REG_READ(DMCUB_GPINT_DATAIN0); } void dmub_dcn32_configure_dmub_in_system_memory(struct dmub_srv *dmub) { diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.h index 29c1132951af..1a229450c53d 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn32.h @@ -254,7 +254,7 @@ void dmub_dcn32_set_outbox0_rptr(struct dmub_srv *dmub, uint32_t rptr_offset); uint32_t dmub_dcn32_get_current_time(struct dmub_srv *dmub); -void dmub_dcn32_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data); +void dmub_dcn32_get_diagnostic_data(struct dmub_srv *dmub); void dmub_dcn32_configure_dmub_in_system_memory(struct dmub_srv *dmub); void dmub_dcn32_send_inbox0_cmd(struct dmub_srv *dmub, union dmub_inbox0_data_register data); diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c index 01d013a12b94..72a0f078cd1a 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c @@ -462,66 +462,69 @@ uint32_t dmub_dcn35_get_current_time(struct dmub_srv *dmub) return REG_READ(DMCUB_TIMER_CURRENT); } -void dmub_dcn35_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data) +void dmub_dcn35_get_diagnostic_data(struct dmub_srv *dmub) { uint32_t is_dmub_enabled, is_soft_reset; uint32_t is_traceport_enabled, is_cw6_enabled; + struct dmub_timeout_info timeout = {0}; - if (!dmub || !diag_data) + if (!dmub) return; - memset(diag_data, 0, sizeof(*diag_data)); - - diag_data->dmcub_version = dmub->fw_version; - - diag_data->scratch[0] = REG_READ(DMCUB_SCRATCH0); - diag_data->scratch[1] = REG_READ(DMCUB_SCRATCH1); - diag_data->scratch[2] = REG_READ(DMCUB_SCRATCH2); - diag_data->scratch[3] = REG_READ(DMCUB_SCRATCH3); - diag_data->scratch[4] = REG_READ(DMCUB_SCRATCH4); - diag_data->scratch[5] = REG_READ(DMCUB_SCRATCH5); - diag_data->scratch[6] = REG_READ(DMCUB_SCRATCH6); - diag_data->scratch[7] = REG_READ(DMCUB_SCRATCH7); - diag_data->scratch[8] = REG_READ(DMCUB_SCRATCH8); - diag_data->scratch[9] = REG_READ(DMCUB_SCRATCH9); - diag_data->scratch[10] = REG_READ(DMCUB_SCRATCH10); - diag_data->scratch[11] = REG_READ(DMCUB_SCRATCH11); - diag_data->scratch[12] = REG_READ(DMCUB_SCRATCH12); - diag_data->scratch[13] = REG_READ(DMCUB_SCRATCH13); - diag_data->scratch[14] = REG_READ(DMCUB_SCRATCH14); - diag_data->scratch[15] = REG_READ(DMCUB_SCRATCH15); - diag_data->scratch[16] = REG_READ(DMCUB_SCRATCH16); - - diag_data->undefined_address_fault_addr = REG_READ(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR); - diag_data->inst_fetch_fault_addr = REG_READ(DMCUB_INST_FETCH_FAULT_ADDR); - diag_data->data_write_fault_addr = REG_READ(DMCUB_DATA_WRITE_FAULT_ADDR); - - diag_data->inbox1_rptr = REG_READ(DMCUB_INBOX1_RPTR); - diag_data->inbox1_wptr = REG_READ(DMCUB_INBOX1_WPTR); - diag_data->inbox1_size = REG_READ(DMCUB_INBOX1_SIZE); - - diag_data->inbox0_rptr = REG_READ(DMCUB_INBOX0_RPTR); - diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); - diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); - - diag_data->outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR); - diag_data->outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR); - diag_data->outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE); + /* timeout data filled externally, cache before resetting memory */ + timeout = dmub->debug.timeout_info; + memset(&dmub->debug, 0, sizeof(dmub->debug)); + dmub->debug.timeout_info = timeout; + + dmub->debug.dmcub_version = dmub->fw_version; + + dmub->debug.scratch[0] = REG_READ(DMCUB_SCRATCH0); + dmub->debug.scratch[1] = REG_READ(DMCUB_SCRATCH1); + dmub->debug.scratch[2] = REG_READ(DMCUB_SCRATCH2); + dmub->debug.scratch[3] = REG_READ(DMCUB_SCRATCH3); + dmub->debug.scratch[4] = REG_READ(DMCUB_SCRATCH4); + dmub->debug.scratch[5] = REG_READ(DMCUB_SCRATCH5); + dmub->debug.scratch[6] = REG_READ(DMCUB_SCRATCH6); + dmub->debug.scratch[7] = REG_READ(DMCUB_SCRATCH7); + dmub->debug.scratch[8] = REG_READ(DMCUB_SCRATCH8); + dmub->debug.scratch[9] = REG_READ(DMCUB_SCRATCH9); + dmub->debug.scratch[10] = REG_READ(DMCUB_SCRATCH10); + dmub->debug.scratch[11] = REG_READ(DMCUB_SCRATCH11); + dmub->debug.scratch[12] = REG_READ(DMCUB_SCRATCH12); + dmub->debug.scratch[13] = REG_READ(DMCUB_SCRATCH13); + dmub->debug.scratch[14] = REG_READ(DMCUB_SCRATCH14); + dmub->debug.scratch[15] = REG_READ(DMCUB_SCRATCH15); + dmub->debug.scratch[16] = REG_READ(DMCUB_SCRATCH16); + + dmub->debug.undefined_address_fault_addr = REG_READ(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR); + dmub->debug.inst_fetch_fault_addr = REG_READ(DMCUB_INST_FETCH_FAULT_ADDR); + dmub->debug.data_write_fault_addr = REG_READ(DMCUB_DATA_WRITE_FAULT_ADDR); + + dmub->debug.inbox1_rptr = REG_READ(DMCUB_INBOX1_RPTR); + dmub->debug.inbox1_wptr = REG_READ(DMCUB_INBOX1_WPTR); + dmub->debug.inbox1_size = REG_READ(DMCUB_INBOX1_SIZE); + + dmub->debug.inbox0_rptr = REG_READ(DMCUB_INBOX0_RPTR); + dmub->debug.inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); + dmub->debug.inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); + + dmub->debug.outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR); + dmub->debug.outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR); + dmub->debug.outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE); REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); - diag_data->is_dmcub_enabled = is_dmub_enabled; + dmub->debug.is_dmcub_enabled = is_dmub_enabled; REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &is_soft_reset); - diag_data->is_dmcub_soft_reset = is_soft_reset; + dmub->debug.is_dmcub_soft_reset = is_soft_reset; REG_GET(DMCUB_CNTL, DMCUB_TRACEPORT_EN, &is_traceport_enabled); - diag_data->is_traceport_en = is_traceport_enabled; + dmub->debug.is_traceport_en = is_traceport_enabled; REG_GET(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE, &is_cw6_enabled); - diag_data->is_cw6_enabled = is_cw6_enabled; + dmub->debug.is_cw6_enabled = is_cw6_enabled; - diag_data->gpint_datain0 = REG_READ(DMCUB_GPINT_DATAIN0); - diag_data->timeout_info = dmub->debug; + dmub->debug.gpint_datain0 = REG_READ(DMCUB_GPINT_DATAIN0); } void dmub_dcn35_configure_dmub_in_system_memory(struct dmub_srv *dmub) { diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.h index 686e97c00ccc..39fcb7275da5 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.h @@ -269,7 +269,7 @@ void dmub_dcn35_set_outbox0_rptr(struct dmub_srv *dmub, uint32_t rptr_offset); uint32_t dmub_dcn35_get_current_time(struct dmub_srv *dmub); -void dmub_dcn35_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data); +void dmub_dcn35_get_diagnostic_data(struct dmub_srv *dmub); void dmub_dcn35_configure_dmub_in_system_memory(struct dmub_srv *dmub); diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c index e1c4fe1c6e3e..e67f7c4784eb 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c @@ -415,72 +415,75 @@ uint32_t dmub_dcn401_get_current_time(struct dmub_srv *dmub) return REG_READ(DMCUB_TIMER_CURRENT); } -void dmub_dcn401_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data) +void dmub_dcn401_get_diagnostic_data(struct dmub_srv *dmub) { uint32_t is_dmub_enabled, is_soft_reset, is_sec_reset; uint32_t is_traceport_enabled, is_cw0_enabled, is_cw6_enabled; + struct dmub_timeout_info timeout = {0}; - if (!dmub || !diag_data) + if (!dmub) return; - memset(diag_data, 0, sizeof(*diag_data)); - - diag_data->dmcub_version = dmub->fw_version; - - diag_data->scratch[0] = REG_READ(DMCUB_SCRATCH0); - diag_data->scratch[1] = REG_READ(DMCUB_SCRATCH1); - diag_data->scratch[2] = REG_READ(DMCUB_SCRATCH2); - diag_data->scratch[3] = REG_READ(DMCUB_SCRATCH3); - diag_data->scratch[4] = REG_READ(DMCUB_SCRATCH4); - diag_data->scratch[5] = REG_READ(DMCUB_SCRATCH5); - diag_data->scratch[6] = REG_READ(DMCUB_SCRATCH6); - diag_data->scratch[7] = REG_READ(DMCUB_SCRATCH7); - diag_data->scratch[8] = REG_READ(DMCUB_SCRATCH8); - diag_data->scratch[9] = REG_READ(DMCUB_SCRATCH9); - diag_data->scratch[10] = REG_READ(DMCUB_SCRATCH10); - diag_data->scratch[11] = REG_READ(DMCUB_SCRATCH11); - diag_data->scratch[12] = REG_READ(DMCUB_SCRATCH12); - diag_data->scratch[13] = REG_READ(DMCUB_SCRATCH13); - diag_data->scratch[14] = REG_READ(DMCUB_SCRATCH14); - diag_data->scratch[15] = REG_READ(DMCUB_SCRATCH15); - diag_data->scratch[16] = REG_READ(DMCUB_SCRATCH16); - - diag_data->undefined_address_fault_addr = REG_READ(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR); - diag_data->inst_fetch_fault_addr = REG_READ(DMCUB_INST_FETCH_FAULT_ADDR); - diag_data->data_write_fault_addr = REG_READ(DMCUB_DATA_WRITE_FAULT_ADDR); - - diag_data->inbox1_rptr = REG_READ(DMCUB_INBOX1_RPTR); - diag_data->inbox1_wptr = REG_READ(DMCUB_INBOX1_WPTR); - diag_data->inbox1_size = REG_READ(DMCUB_INBOX1_SIZE); - - diag_data->inbox0_rptr = REG_READ(DMCUB_INBOX0_RPTR); - diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); - diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); - - diag_data->outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR); - diag_data->outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR); - diag_data->outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE); + /* timeout data filled externally, cache before resetting memory */ + timeout = dmub->debug.timeout_info; + memset(&dmub->debug, 0, sizeof(dmub->debug)); + dmub->debug.timeout_info = timeout; + + dmub->debug.dmcub_version = dmub->fw_version; + + dmub->debug.scratch[0] = REG_READ(DMCUB_SCRATCH0); + dmub->debug.scratch[1] = REG_READ(DMCUB_SCRATCH1); + dmub->debug.scratch[2] = REG_READ(DMCUB_SCRATCH2); + dmub->debug.scratch[3] = REG_READ(DMCUB_SCRATCH3); + dmub->debug.scratch[4] = REG_READ(DMCUB_SCRATCH4); + dmub->debug.scratch[5] = REG_READ(DMCUB_SCRATCH5); + dmub->debug.scratch[6] = REG_READ(DMCUB_SCRATCH6); + dmub->debug.scratch[7] = REG_READ(DMCUB_SCRATCH7); + dmub->debug.scratch[8] = REG_READ(DMCUB_SCRATCH8); + dmub->debug.scratch[9] = REG_READ(DMCUB_SCRATCH9); + dmub->debug.scratch[10] = REG_READ(DMCUB_SCRATCH10); + dmub->debug.scratch[11] = REG_READ(DMCUB_SCRATCH11); + dmub->debug.scratch[12] = REG_READ(DMCUB_SCRATCH12); + dmub->debug.scratch[13] = REG_READ(DMCUB_SCRATCH13); + dmub->debug.scratch[14] = REG_READ(DMCUB_SCRATCH14); + dmub->debug.scratch[15] = REG_READ(DMCUB_SCRATCH15); + dmub->debug.scratch[16] = REG_READ(DMCUB_SCRATCH16); + + dmub->debug.undefined_address_fault_addr = REG_READ(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR); + dmub->debug.inst_fetch_fault_addr = REG_READ(DMCUB_INST_FETCH_FAULT_ADDR); + dmub->debug.data_write_fault_addr = REG_READ(DMCUB_DATA_WRITE_FAULT_ADDR); + + dmub->debug.inbox1_rptr = REG_READ(DMCUB_INBOX1_RPTR); + dmub->debug.inbox1_wptr = REG_READ(DMCUB_INBOX1_WPTR); + dmub->debug.inbox1_size = REG_READ(DMCUB_INBOX1_SIZE); + + dmub->debug.inbox0_rptr = REG_READ(DMCUB_INBOX0_RPTR); + dmub->debug.inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); + dmub->debug.inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); + + dmub->debug.outbox1_rptr = REG_READ(DMCUB_OUTBOX1_RPTR); + dmub->debug.outbox1_wptr = REG_READ(DMCUB_OUTBOX1_WPTR); + dmub->debug.outbox1_size = REG_READ(DMCUB_OUTBOX1_SIZE); REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); - diag_data->is_dmcub_enabled = is_dmub_enabled; + dmub->debug.is_dmcub_enabled = is_dmub_enabled; REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &is_soft_reset); - diag_data->is_dmcub_soft_reset = is_soft_reset; + dmub->debug.is_dmcub_soft_reset = is_soft_reset; REG_GET(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS, &is_sec_reset); - diag_data->is_dmcub_secure_reset = is_sec_reset; + dmub->debug.is_dmcub_secure_reset = is_sec_reset; REG_GET(DMCUB_CNTL, DMCUB_TRACEPORT_EN, &is_traceport_enabled); - diag_data->is_traceport_en = is_traceport_enabled; + dmub->debug.is_traceport_en = is_traceport_enabled; REG_GET(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_ENABLE, &is_cw0_enabled); - diag_data->is_cw0_enabled = is_cw0_enabled; + dmub->debug.is_cw0_enabled = is_cw0_enabled; REG_GET(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE, &is_cw6_enabled); - diag_data->is_cw6_enabled = is_cw6_enabled; + dmub->debug.is_cw6_enabled = is_cw6_enabled; - diag_data->gpint_datain0 = REG_READ(DMCUB_GPINT_DATAIN0); - diag_data->timeout_info = dmub->debug; + dmub->debug.gpint_datain0 = REG_READ(DMCUB_GPINT_DATAIN0); } void dmub_dcn401_configure_dmub_in_system_memory(struct dmub_srv *dmub) { diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h index 31f95b27e227..c35be52676f6 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h @@ -264,7 +264,7 @@ void dmub_dcn401_set_outbox0_rptr(struct dmub_srv *dmub, uint32_t rptr_offset); uint32_t dmub_dcn401_get_current_time(struct dmub_srv *dmub); -void dmub_dcn401_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data); +void dmub_dcn401_get_diagnostic_data(struct dmub_srv *dmub); void dmub_dcn401_configure_dmub_in_system_memory(struct dmub_srv *dmub); void dmub_dcn401_send_inbox0_cmd(struct dmub_srv *dmub, union dmub_inbox0_data_register data); diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index 6133d25da301..0a2285540daf 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -1099,11 +1099,11 @@ bool dmub_srv_get_outbox0_msg(struct dmub_srv *dmub, struct dmcub_trace_buf_entr return dmub_rb_out_trace_buffer_front(&dmub->outbox0_rb, (void *)entry); } -bool dmub_srv_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data) +bool dmub_srv_get_diagnostic_data(struct dmub_srv *dmub) { - if (!dmub || !dmub->hw_funcs.get_diagnostic_data || !diag_data) + if (!dmub || !dmub->hw_funcs.get_diagnostic_data) return false; - dmub->hw_funcs.get_diagnostic_data(dmub, diag_data); + dmub->hw_funcs.get_diagnostic_data(dmub); return true; } From 0747acf3311229e22009bec4a9e7fc30c879e842 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sat, 22 Feb 2025 23:37:32 -0600 Subject: [PATCH 07/68] drm/amd/display: fix default brightness [Why] To avoid flickering during boot default brightness level set by BIOS should be maintained for as much of the boot as feasible. commit 2fe87f54abdc ("drm/amd/display: Set default brightness according to ACPI") attempted to set the right levels for AC vs DC, but brightness still got reset to maximum level in initialization code for setup_backlight_device(). [How] Remove the hardcoded initialization in setup_backlight_device() and instead program brightness value to match BIOS (AC or DC). This avoids a brightness flicker from kernel changing the value. Userspace may however still change it during boot. Fixes: 2fe87f54abdc ("drm/amd/display: Set default brightness according to ACPI") Acked-by: Wayne Lin Signed-off-by: Mario Limonciello Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 6a54f1cfa125..5ae521d1b74b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4986,6 +4986,7 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector) dm->backlight_dev[aconnector->bl_idx] = backlight_device_register(bl_name, aconnector->base.kdev, dm, &amdgpu_dm_backlight_ops, &props); + dm->brightness[aconnector->bl_idx] = props.brightness; if (IS_ERR(dm->backlight_dev[aconnector->bl_idx])) { DRM_ERROR("DM: Backlight registration failed!\n"); @@ -5053,7 +5054,6 @@ static void setup_backlight_device(struct amdgpu_display_manager *dm, aconnector->bl_idx = bl_idx; amdgpu_dm_update_backlight_caps(dm, bl_idx); - dm->brightness[bl_idx] = AMDGPU_MAX_BL_LEVEL; dm->backlight_link[bl_idx] = link; dm->num_of_edps++; From 5e19e2b57b6bb640d68dfc7991e1e182922cf867 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sun, 23 Feb 2025 00:04:35 -0600 Subject: [PATCH 08/68] drm/amd/display: Restore correct backlight brightness after a GPU reset [Why] GPU reset will attempt to restore cached state, but brightness doesn't get restored. It will come back at 100% brightness, but userspace thinks it's the previous value. [How] When running resume sequence if GPU is in reset restore brightness to previous value. Acked-by: Wayne Lin Signed-off-by: Mario Limonciello Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5ae521d1b74b..bf69cd50712e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -250,6 +250,10 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector); static void handle_hpd_rx_irq(void *param); +static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, + int bl_idx, + u32 user_brightness); + static bool is_timing_unchanged_for_freesync(struct drm_crtc_state *old_crtc_state, struct drm_crtc_state *new_crtc_state); @@ -3432,6 +3436,12 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) mutex_unlock(&dm->dc_lock); + /* set the backlight after a reset */ + for (i = 0; i < dm->num_of_edps; i++) { + if (dm->backlight_dev[i]) + amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]); + } + return 0; } From 50e0bae34fa6b8b18e13473ddf0bcdab6ab68310 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 13 Feb 2025 16:26:22 -0600 Subject: [PATCH 09/68] drm/amd/display: Add and use new dm_prepare_suspend() callback [Why] The displays currently don't get turned off until after other IP blocks have been suspended. However turning off the displays first gives a very visible response that the system is on it's way down. [How] Turn off displays in a prepare_suspend() callback instead when possible. This will help for suspend and hibernate sequences. The shutdown sequence however will not call prepare() so check whether the state has been already saved to decide what to do. Acked-by: Wayne Lin Signed-off-by: Mario Limonciello Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index bf69cd50712e..4f1cf8afe25f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3141,6 +3141,21 @@ static void hpd_rx_irq_work_suspend(struct amdgpu_display_manager *dm) } } +static int dm_prepare_suspend(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + + if (amdgpu_in_reset(adev)) + return 0; + + WARN_ON(adev->dm.cached_state); + adev->dm.cached_state = drm_atomic_helper_suspend(adev_to_drm(adev)); + if (IS_ERR(adev->dm.cached_state)) + return PTR_ERR(adev->dm.cached_state); + + return 0; +} + static int dm_suspend(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -3171,10 +3186,11 @@ static int dm_suspend(struct amdgpu_ip_block *ip_block) return 0; } - WARN_ON(adev->dm.cached_state); - adev->dm.cached_state = drm_atomic_helper_suspend(adev_to_drm(adev)); - if (IS_ERR(adev->dm.cached_state)) - return PTR_ERR(adev->dm.cached_state); + if (!adev->dm.cached_state) { + adev->dm.cached_state = drm_atomic_helper_suspend(adev_to_drm(adev)); + if (IS_ERR(adev->dm.cached_state)) + return PTR_ERR(adev->dm.cached_state); + } s3_handle_hdmi_cec(adev_to_drm(adev), true); @@ -3606,6 +3622,7 @@ static const struct amd_ip_funcs amdgpu_dm_funcs = { .early_fini = amdgpu_dm_early_fini, .hw_init = dm_hw_init, .hw_fini = dm_hw_fini, + .prepare_suspend = dm_prepare_suspend, .suspend = dm_suspend, .resume = dm_resume, .is_idle = dm_is_idle, From bd00b29b5f236dce677089319176dee5872b5a7a Mon Sep 17 00:00:00 2001 From: Danny Wang Date: Thu, 13 Feb 2025 16:18:34 +0800 Subject: [PATCH 10/68] drm/amd/display: Do not enable replay when vtotal update is pending. [Why&How] Vtotal is not applied to HW when handling vsync interrupt. Make sure vtotal is aligned before enable replay. Reviewed-by: Anthony Koo Reviewed-by: Robin Chen Signed-off-by: Danny Wang Signed-off-by: Zhongwei Zhang Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 9 +++++++-- .../gpu/drm/amd/display/dc/core/dc_hw_sequencer.c | 15 +++++++++++++++ drivers/gpu/drm/amd/display/dc/dc_hw_types.h | 1 + .../drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 7 ++----- .../drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 7 ++----- .../drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c | 8 ++------ .../drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c | 4 +--- .../drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 3 +-- .../drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 10 +++------- .../gpu/drm/amd/display/dc/hwss/hw_sequencer.h | 6 ++++++ 10 files changed, 40 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index e71ea21401f5..5a43e4901cc0 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -453,6 +453,7 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, if (dc->caps.max_v_total != 0 && (adjust->v_total_max > dc->caps.max_v_total || adjust->v_total_min > dc->caps.max_v_total)) { + stream->adjust.timing_adjust_pending = false; if (adjust->allow_otg_v_count_halt) return set_long_vtotal(dc, stream, adjust); else @@ -466,7 +467,7 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc, dc->hwss.set_drr(&pipe, 1, *adjust); - + stream->adjust.timing_adjust_pending = false; return true; } } @@ -3165,8 +3166,12 @@ static void copy_stream_update_to_stream(struct dc *dc, if (update->vrr_active_fixed) stream->vrr_active_fixed = *update->vrr_active_fixed; - if (update->crtc_timing_adjust) + if (update->crtc_timing_adjust) { + if (stream->adjust.v_total_min != update->crtc_timing_adjust->v_total_min || + stream->adjust.v_total_max != update->crtc_timing_adjust->v_total_max) + stream->adjust.timing_adjust_pending = true; stream->adjust = *update->crtc_timing_adjust; + } if (update->dpms_off) stream->dpms_off = *update->dpms_off; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index e0277728268a..52ee2225e132 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -659,6 +659,21 @@ void set_p_state_switch_method( } } +void set_drr_and_clear_adjust_pending( + struct pipe_ctx *pipe_ctx, + struct dc_stream_state *stream, + struct drr_params *params) +{ + /* params can be null.*/ + if (pipe_ctx && pipe_ctx->stream_res.tg && + pipe_ctx->stream_res.tg->funcs->set_drr) + pipe_ctx->stream_res.tg->funcs->set_drr( + pipe_ctx->stream_res.tg, params); + + if (stream) + stream->adjust.timing_adjust_pending = false; +} + void get_fams2_visual_confirm_color( struct dc *dc, struct dc_state *context, diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index 9f3dd8824ed5..d562ddeca512 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -1017,6 +1017,7 @@ struct dc_crtc_timing_adjust { uint32_t v_total_mid; uint32_t v_total_mid_frame_num; uint32_t allow_otg_v_count_halt; + uint8_t timing_adjust_pending; }; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 9c9947fc5d44..bfd734e15731 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1658,9 +1658,7 @@ enum dc_status dce110_apply_single_controller_ctx_to_hw( params.vertical_total_min = stream->adjust.v_total_min; params.vertical_total_max = stream->adjust.v_total_max; - if (pipe_ctx->stream_res.tg->funcs->set_drr) - pipe_ctx->stream_res.tg->funcs->set_drr( - pipe_ctx->stream_res.tg, ¶ms); + set_drr_and_clear_adjust_pending(pipe_ctx, stream, ¶ms); // DRR should set trigger event to monitor surface update event if (stream->adjust.v_total_min != 0 && stream->adjust.v_total_max != 0) @@ -2109,8 +2107,7 @@ static void set_drr(struct pipe_ctx **pipe_ctx, struct timing_generator *tg = pipe_ctx[i]->stream_res.tg; if ((tg != NULL) && tg->funcs) { - if (tg->funcs->set_drr) - tg->funcs->set_drr(tg, ¶ms); + set_drr_and_clear_adjust_pending(pipe_ctx[i], pipe_ctx[i]->stream, ¶ms); if (adjust.v_total_max != 0 && adjust.v_total_min != 0) if (tg->funcs->set_static_screen_control) tg->funcs->set_static_screen_control( diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 301ef36d3d05..912f96323ed6 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -1113,9 +1113,7 @@ static void dcn10_reset_back_end_for_pipe( pipe_ctx->stream_res.tg->funcs->disable_crtc(pipe_ctx->stream_res.tg); pipe_ctx->stream_res.tg->funcs->enable_optc_clock(pipe_ctx->stream_res.tg, false); - if (pipe_ctx->stream_res.tg->funcs->set_drr) - pipe_ctx->stream_res.tg->funcs->set_drr( - pipe_ctx->stream_res.tg, NULL); + set_drr_and_clear_adjust_pending(pipe_ctx, pipe_ctx->stream, NULL); if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal)) pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0; } @@ -3218,8 +3216,7 @@ void dcn10_set_drr(struct pipe_ctx **pipe_ctx, struct timing_generator *tg = pipe_ctx[i]->stream_res.tg; if ((tg != NULL) && tg->funcs) { - if (tg->funcs->set_drr) - tg->funcs->set_drr(tg, ¶ms); + set_drr_and_clear_adjust_pending(pipe_ctx[i], pipe_ctx[i]->stream, ¶ms); if (adjust.v_total_max != 0 && adjust.v_total_min != 0) if (tg->funcs->set_static_screen_control) tg->funcs->set_static_screen_control( diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c index a5a3e0823e21..926c08e790c1 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn20/dcn20_hwseq.c @@ -952,9 +952,7 @@ enum dc_status dcn20_enable_stream_timing( params.vertical_total_max = stream->adjust.v_total_max; params.vertical_total_mid = stream->adjust.v_total_mid; params.vertical_total_mid_frame_num = stream->adjust.v_total_mid_frame_num; - if (pipe_ctx->stream_res.tg->funcs->set_drr) - pipe_ctx->stream_res.tg->funcs->set_drr( - pipe_ctx->stream_res.tg, ¶ms); + set_drr_and_clear_adjust_pending(pipe_ctx, stream, ¶ms); // DRR should set trigger event to monitor surface update event if (stream->adjust.v_total_min != 0 && stream->adjust.v_total_max != 0) @@ -2856,9 +2854,7 @@ void dcn20_reset_back_end_for_pipe( pipe_ctx->stream_res.tg->funcs->set_odm_bypass( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing); - if (pipe_ctx->stream_res.tg->funcs->set_drr) - pipe_ctx->stream_res.tg->funcs->set_drr( - pipe_ctx->stream_res.tg, NULL); + set_drr_and_clear_adjust_pending(pipe_ctx, pipe_ctx->stream, NULL); /* TODO - convert symclk_ref_cnts for otg to a bit map to solve * the case where the same symclk is shared across multiple otg * instances diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c index 288e9dd9205d..f38340aa3f15 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c @@ -543,9 +543,7 @@ static void dcn31_reset_back_end_for_pipe( if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal)) pipe_ctx->stream->link->phy_state.symclk_ref_cnts.otg = 0; - if (pipe_ctx->stream_res.tg->funcs->set_drr) - pipe_ctx->stream_res.tg->funcs->set_drr( - pipe_ctx->stream_res.tg, NULL); + set_drr_and_clear_adjust_pending(pipe_ctx, pipe_ctx->stream, NULL); /* DPMS may already disable or */ /* dpms_off status is incorrect due to fastboot diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index b907ad1acedd..922b8d71cf1a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -1473,8 +1473,7 @@ void dcn35_set_drr(struct pipe_ctx **pipe_ctx, num_frames = 2 * (frame_rate % 60); } } - if (tg->funcs->set_drr) - tg->funcs->set_drr(tg, ¶ms); + set_drr_and_clear_adjust_pending(pipe_ctx[i], pipe_ctx[i]->stream, ¶ms); if (adjust.v_total_max != 0 && adjust.v_total_min != 0) if (tg->funcs->set_static_screen_control) tg->funcs->set_static_screen_control( diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 39668d8cc13a..8f5da0ded850 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -830,10 +830,7 @@ enum dc_status dcn401_enable_stream_timing( } hws->funcs.wait_for_blank_complete(pipe_ctx->stream_res.opp); - - if (pipe_ctx->stream_res.tg->funcs->set_drr) - pipe_ctx->stream_res.tg->funcs->set_drr( - pipe_ctx->stream_res.tg, ¶ms); + set_drr_and_clear_adjust_pending(pipe_ctx, stream, ¶ms); /* Event triggers and num frames initialized for DRR, but can be * later updated for PSR use. Note DRR trigger events are generated @@ -1820,9 +1817,8 @@ void dcn401_reset_back_end_for_pipe( pipe_ctx->stream_res.tg->funcs->set_odm_bypass( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing); - if (pipe_ctx->stream_res.tg->funcs->set_drr) - pipe_ctx->stream_res.tg->funcs->set_drr( - pipe_ctx->stream_res.tg, NULL); + set_drr_and_clear_adjust_pending(pipe_ctx, pipe_ctx->stream, NULL); + /* TODO - convert symclk_ref_cnts for otg to a bit map to solve * the case where the same symclk is shared across multiple otg * instances diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index 2b1a2a00648a..c8b5ed834579 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -46,6 +46,7 @@ struct dce_hwseq; struct link_resource; struct dc_dmub_cmd; struct pg_block_update; +struct drr_params; struct subvp_pipe_control_lock_fast_params { struct dc *dc; @@ -527,6 +528,11 @@ void set_p_state_switch_method( struct dc_state *context, struct pipe_ctx *pipe_ctx); +void set_drr_and_clear_adjust_pending( + struct pipe_ctx *pipe_ctx, + struct dc_stream_state *stream, + struct drr_params *params); + void hwss_execute_sequence(struct dc *dc, struct block_sequence block_sequence[], int num_steps); From 8a21da2842bb22b2b80e5902d0438030d729bfd3 Mon Sep 17 00:00:00 2001 From: Peichen Huang Date: Tue, 25 Feb 2025 14:52:30 +0800 Subject: [PATCH 11/68] drm/amd/display: not abort link train when bw is low [WHY] DP tunneling should not abort link train even bandwidth become too low after downgrade. Otherwise, it would fail compliance test. [HOW} Do link train with downgrade settings even bandwidth is not enough Reviewed-by: Cruise Hung Reviewed-by: Meenakshikumar Somasundaram Signed-off-by: Peichen Huang Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/link/protocols/link_dp_training.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c index 613298d21d03..ef358afdfb65 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c @@ -1789,13 +1789,10 @@ bool perform_link_training_with_retries( is_link_bw_min = ((cur_link_settings.link_rate <= LINK_RATE_LOW) && (cur_link_settings.lane_count <= LANE_COUNT_ONE)); - if (is_link_bw_low) { + if (is_link_bw_low) DC_LOG_WARNING( "%s: Link(%d) bandwidth too low after fallback req_bw(%d) > link_bw(%d)\n", __func__, link->link_index, req_bw, link_bw); - - return false; - } } msleep(delay_between_attempts); From 084e0735448ae4d01928b537c03c7042aef2ab81 Mon Sep 17 00:00:00 2001 From: George Shen Date: Thu, 13 Feb 2025 14:46:49 -0500 Subject: [PATCH 12/68] drm/amd/display: Implement PCON regulated autonomous mode handling [Why/How] DP spec has been updated recently to make regulated autonomous mode more well-defined. In case any PCON vendors choose to implement regulated autonomous mode in the future, pre-emptively add handling for the regulated autonomous mode based on current spec. Reviewed-by: Wenjing Liu Signed-off-by: George Shen Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dp_types.h | 27 ++++++++- .../dc/link/protocols/link_dp_capability.c | 55 +++++++++++++++---- 2 files changed, 71 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index 1f4f11adc491..77c87ad57220 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -432,7 +432,28 @@ union hdmi_encoded_link_bw { uint8_t BW_32Gbps:1; uint8_t BW_40Gbps:1; uint8_t BW_48Gbps:1; - uint8_t RESERVED:1; // Bit 7 + uint8_t FRL_LINK_TRAINING_FINISHED:1; // Bit 7 + } bits; + uint8_t raw; +}; + +union hdmi_tx_link_status { + struct { + uint8_t HDMI_TX_LINK_ACTIVE_STATUS:1; + uint8_t HDMI_TX_READY_STATUS:1; + uint8_t RESERVED:6; + } bits; + uint8_t raw; +}; + +union autonomous_mode_and_frl_link_status { + struct { + uint8_t FRL_LT_IN_PROGRESS_STATUS:1; + uint8_t FRL_LT_LINK_CONFIG_IN_PROGRESS:3; + uint8_t RESERVED:1; + uint8_t FALLBACK_POLICY:1; + uint8_t FALLBACK_POLICY_VALID:1; + uint8_t REGULATED_AUTONOMOUS_MODE_SUPPORTED:1; } bits; uint8_t raw; }; @@ -1166,6 +1187,7 @@ struct dc_dongle_caps { uint32_t dp_hdmi_max_bpc; uint32_t dp_hdmi_max_pixel_clk_in_khz; uint32_t dp_hdmi_frl_max_link_bw_in_kbps; + uint32_t dp_hdmi_regulated_autonomous_mode_support; struct dc_dongle_dfp_cap_ext dfp_cap_ext; }; @@ -1394,6 +1416,9 @@ struct dp_trace { #ifndef DP_LTTPR_ALPM_CAPABILITIES #define DP_LTTPR_ALPM_CAPABILITIES 0xF0009 #endif +#ifndef DP_REGULATED_AUTONOMOUS_MODE_SUPPORTED_AND_HDMI_LINK_TRAINING_STATUS +#define DP_REGULATED_AUTONOMOUS_MODE_SUPPORTED_AND_HDMI_LINK_TRAINING_STATUS 0x303C +#endif #ifndef DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE #define DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE 0x50 #endif diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index a77410122636..21ee0d96c9d4 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -265,6 +265,8 @@ static uint32_t intersect_frl_link_bw_support( supported_bw_in_kbps = 18000000; else if (hdmi_encoded_link_bw.bits.BW_9Gbps) supported_bw_in_kbps = 9000000; + else if (hdmi_encoded_link_bw.bits.FRL_LINK_TRAINING_FINISHED) + supported_bw_in_kbps = 0; /* This case should only get hit in regulated autonomous mode. */ return supported_bw_in_kbps; } @@ -1075,6 +1077,48 @@ static enum dc_status wake_up_aux_channel(struct dc_link *link) return DC_OK; } +static void read_and_intersect_post_frl_lt_status( + struct dc_link *link) +{ + union autonomous_mode_and_frl_link_status autonomous_mode_caps = {0}; + union hdmi_tx_link_status hdmi_tx_link_status = {0}; + union hdmi_encoded_link_bw hdmi_encoded_link_bw = {0}; + + /* Check if dongle supports regulated autonomous mode. */ + core_link_read_dpcd(link, DP_REGULATED_AUTONOMOUS_MODE_SUPPORTED_AND_HDMI_LINK_TRAINING_STATUS, + &autonomous_mode_caps.raw, sizeof(autonomous_mode_caps)); + + link->dpcd_caps.dongle_caps.dp_hdmi_regulated_autonomous_mode_support = + autonomous_mode_caps.bits.REGULATED_AUTONOMOUS_MODE_SUPPORTED; + + if (link->dpcd_caps.dongle_caps.dp_hdmi_regulated_autonomous_mode_support) { + DC_LOG_DC("%s: PCON supports regulated autonomous mode.\n", __func__); + + core_link_read_dpcd(link, DP_PCON_HDMI_TX_LINK_STATUS, + &hdmi_tx_link_status.raw, sizeof(hdmi_tx_link_status)); + } + + // Intersect reported max link bw support with the supported link rate post FRL link training + if (core_link_read_dpcd(link, DP_PCON_HDMI_POST_FRL_STATUS, + &hdmi_encoded_link_bw.raw, sizeof(hdmi_encoded_link_bw)) == DC_OK) { + + if (link->dpcd_caps.dongle_caps.dp_hdmi_regulated_autonomous_mode_support && + (!hdmi_tx_link_status.bits.HDMI_TX_READY_STATUS || + !hdmi_encoded_link_bw.bits.FRL_LINK_TRAINING_FINISHED)) { + DC_LOG_WARNING("%s: PCON TX link training has not finished.\n", __func__); + + /* Link training not finished, ignore values from this DPCD reg. */ + return; + } + + link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps = intersect_frl_link_bw_support( + link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps, + hdmi_encoded_link_bw); + DC_LOG_DC("%s: pcon frl link bw = %u\n", __func__, + link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps); + } +} + static void get_active_converter_info( uint8_t data, struct dc_link *link) { @@ -1163,21 +1207,12 @@ static void get_active_converter_info( hdmi_color_caps.bits.MAX_BITS_PER_COLOR_COMPONENT); if (link->dc->caps.dp_hdmi21_pcon_support) { - union hdmi_encoded_link_bw hdmi_encoded_link_bw; link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps = link_bw_kbps_from_raw_frl_link_rate_data( hdmi_color_caps.bits.MAX_ENCODED_LINK_BW_SUPPORT); - // Intersect reported max link bw support with the supported link rate post FRL link training - if (core_link_read_dpcd(link, DP_PCON_HDMI_POST_FRL_STATUS, - &hdmi_encoded_link_bw.raw, sizeof(hdmi_encoded_link_bw)) == DC_OK) { - link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps = intersect_frl_link_bw_support( - link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps, - hdmi_encoded_link_bw); - DC_LOG_DC("%s: pcon frl link bw = %u\n", __func__, - link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps); - } + read_and_intersect_post_frl_lt_status(link); if (link->dpcd_caps.dongle_caps.dp_hdmi_frl_max_link_bw_in_kbps > 0) link->dpcd_caps.dongle_caps.extendedCapValid = true; From 50bcdef7b616cc6471172fd8cee32a57d6fb39a6 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Wed, 26 Feb 2025 23:52:57 -0500 Subject: [PATCH 13/68] drm/amd/display: assume VBIOS supports DSC as default [Why & How] The clear_dsc_setting at boot logic was based on dcn version check. As such new ASIC lost this DSC clear up logic, change the assumption to BIOS support eDP DSC for new ASIC. Reviewed-by: Alvin Lee Signed-off-by: Charlene Liu Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index bfd734e15731..dbc6e533dcac 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1836,11 +1836,10 @@ static void clean_up_dsc_blocks(struct dc *dc) struct pg_cntl *pg_cntl = dc->res_pool->pg_cntl; int i; - if (dc->ctx->dce_version != DCN_VERSION_3_5 && - dc->ctx->dce_version != DCN_VERSION_3_6 && - dc->ctx->dce_version != DCN_VERSION_3_51) + if (!dc->caps.is_apu || + dc->ctx->dce_version < DCN_VERSION_3_15) return; - + /*VBIOS supports dsc starts from dcn315*/ for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++) { struct dcn_dsc_state s = {0}; From 15d1c2e6bf60511ba068d7d735d051911c6c5b92 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Fri, 20 Dec 2024 17:01:29 -0500 Subject: [PATCH 14/68] drm/amd/display: Add Support for reg inbox0 for host->DMUB CMDs [WHY] DCN4+ supports a new register based mailbox for sending messages from host to DMCUB. This mailbox supports 64 byte commands, which makes it compatible with the same structure as the frame buffer based mailbox. [HOW] The intention for reg_inbox0 is to be slot in replacement for the frame buffer based mailbox (Inbox1). It supports all of the required features: - Supports all messages handled by FB Inbox1 - Supports multi command batching Reviewed-by: Joshua Aberback Signed-off-by: Dillon Varone Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 185 ++++++----- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h | 2 +- drivers/gpu/drm/amd/display/dc/dc_helper.c | 2 +- .../gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c | 3 +- .../gpu/drm/amd/display/dc/dce/dmub_replay.c | 19 +- .../display/dc/link/protocols/link_dp_dpia.c | 1 + drivers/gpu/drm/amd/display/dmub/dmub_srv.h | 133 +++++--- .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 49 ++- .../drm/amd/display/dmub/src/dmub_dcn401.c | 121 ++++--- .../drm/amd/display/dmub/src/dmub_dcn401.h | 4 +- .../gpu/drm/amd/display/dmub/src/dmub_srv.c | 302 ++++++++++++------ 11 files changed, 547 insertions(+), 274 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 614e03bfd598..ca6da53f45ad 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -70,20 +70,28 @@ void dc_dmub_srv_destroy(struct dc_dmub_srv **dmub_srv) } } -void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv) +bool dc_dmub_srv_wait_for_pending(struct dc_dmub_srv *dc_dmub_srv) { - struct dmub_srv *dmub = dc_dmub_srv->dmub; - struct dc_context *dc_ctx = dc_dmub_srv->ctx; + struct dmub_srv *dmub; + struct dc_context *dc_ctx; enum dmub_status status; + if (!dc_dmub_srv || !dc_dmub_srv->dmub) + return false; + + dc_ctx = dc_dmub_srv->ctx; + dmub = dc_dmub_srv->dmub; + do { - status = dmub_srv_wait_for_idle(dmub, 100000); + status = dmub_srv_wait_for_pending(dmub, 100000); } while (dc_dmub_srv->ctx->dc->debug.disable_timeout && status != DMUB_STATUS_OK); if (status != DMUB_STATUS_OK) { DC_ERROR("Error waiting for DMUB idle: status=%d\n", status); dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); } + + return status == DMUB_STATUS_OK; } void dc_dmub_srv_clear_inbox0_ack(struct dc_dmub_srv *dc_dmub_srv) @@ -126,7 +134,49 @@ void dc_dmub_srv_send_inbox0_cmd(struct dc_dmub_srv *dc_dmub_srv, } } -bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, +static bool dc_dmub_srv_reg_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, + unsigned int count, + union dmub_rb_cmd *cmd_list) +{ + struct dc_context *dc_ctx; + struct dmub_srv *dmub; + enum dmub_status status = DMUB_STATUS_OK; + int i; + + if (!dc_dmub_srv || !dc_dmub_srv->dmub) + return false; + + dc_ctx = dc_dmub_srv->ctx; + dmub = dc_dmub_srv->dmub; + + for (i = 0 ; i < count; i++) { + /* confirm no messages pending */ + do { + status = dmub_srv_wait_for_idle(dmub, 100000); + } while (dc_dmub_srv->ctx->dc->debug.disable_timeout && status != DMUB_STATUS_OK); + + /* queue command */ + if (status == DMUB_STATUS_OK) + status = dmub_srv_reg_cmd_execute(dmub, &cmd_list[i]); + + /* check for errors */ + if (status != DMUB_STATUS_OK) { + break; + } + } + + if (status != DMUB_STATUS_OK) { + if (status != DMUB_STATUS_POWER_STATE_D3) { + DC_ERROR("Error starting DMUB execution: status=%d\n", status); + dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + } + return false; + } + + return true; +} + +static bool dc_dmub_srv_fb_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, unsigned int count, union dmub_rb_cmd *cmd_list) { @@ -143,11 +193,16 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, for (i = 0 ; i < count; i++) { // Queue command - status = dmub_srv_cmd_queue(dmub, &cmd_list[i]); + if (!cmd_list[i].cmd_common.header.multi_cmd_pending || + dmub_rb_num_free(&dmub->inbox1.rb) >= count - i) { + status = dmub_srv_fb_cmd_queue(dmub, &cmd_list[i]); + } else { + status = DMUB_STATUS_QUEUE_FULL; + } if (status == DMUB_STATUS_QUEUE_FULL) { /* Execute and wait for queue to become empty again. */ - status = dmub_srv_cmd_execute(dmub); + status = dmub_srv_fb_cmd_execute(dmub); if (status == DMUB_STATUS_POWER_STATE_D3) return false; @@ -156,7 +211,7 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, } while (dc_dmub_srv->ctx->dc->debug.disable_timeout && status != DMUB_STATUS_OK); /* Requeue the command. */ - status = dmub_srv_cmd_queue(dmub, &cmd_list[i]); + status = dmub_srv_fb_cmd_queue(dmub, &cmd_list[i]); } if (status != DMUB_STATUS_OK) { @@ -168,7 +223,7 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, } } - status = dmub_srv_cmd_execute(dmub); + status = dmub_srv_fb_cmd_execute(dmub); if (status != DMUB_STATUS_OK) { if (status != DMUB_STATUS_POWER_STATE_D3) { DC_ERROR("Error starting DMUB execution: status=%d\n", status); @@ -180,6 +235,23 @@ bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, return true; } +bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, + unsigned int count, + union dmub_rb_cmd *cmd_list) +{ + bool res = false; + + if (dc_dmub_srv && dc_dmub_srv->dmub) { + if (dc_dmub_srv->dmub->inbox_type == DMUB_CMD_INTERFACE_REG) { + res = dc_dmub_srv_reg_cmd_list_queue_execute(dc_dmub_srv, count, cmd_list); + } else { + res = dc_dmub_srv_fb_cmd_list_queue_execute(dc_dmub_srv, count, cmd_list); + } + } + + return res; +} + bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv, enum dm_dmub_wait_type wait_type, union dmub_rb_cmd *cmd_list) @@ -202,7 +274,8 @@ bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv, DC_LOG_DEBUG("No reply for DMUB command: status=%d\n", status); if (!dmub->debug.timeout_info.timeout_occured) { dmub->debug.timeout_info.timeout_occured = true; - dmub->debug.timeout_info.timeout_cmd = *cmd_list; + if (cmd_list) + dmub->debug.timeout_info.timeout_cmd = *cmd_list; dmub->debug.timeout_info.timestamp = dm_get_timestamp(dc_dmub_srv->ctx); } dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); @@ -210,8 +283,9 @@ bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv, } // Copy data back from ring buffer into command - if (wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) - dmub_rb_get_return_data(&dmub->inbox1_rb, cmd_list); + if (wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY && cmd_list) { + dmub_srv_cmd_get_response(dc_dmub_srv->dmub, cmd_list); + } } return true; @@ -224,74 +298,10 @@ bool dc_dmub_srv_cmd_run(struct dc_dmub_srv *dc_dmub_srv, union dmub_rb_cmd *cmd bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int count, union dmub_rb_cmd *cmd_list, enum dm_dmub_wait_type wait_type) { - struct dc_context *dc_ctx; - struct dmub_srv *dmub; - enum dmub_status status; - int i; - - if (!dc_dmub_srv || !dc_dmub_srv->dmub) - return false; - - dc_ctx = dc_dmub_srv->ctx; - dmub = dc_dmub_srv->dmub; - - for (i = 0 ; i < count; i++) { - // Queue command - status = dmub_srv_cmd_queue(dmub, &cmd_list[i]); - - if (status == DMUB_STATUS_QUEUE_FULL) { - /* Execute and wait for queue to become empty again. */ - status = dmub_srv_cmd_execute(dmub); - if (status == DMUB_STATUS_POWER_STATE_D3) - return false; - - status = dmub_srv_wait_for_idle(dmub, 100000); - if (status != DMUB_STATUS_OK) - return false; - - /* Requeue the command. */ - status = dmub_srv_cmd_queue(dmub, &cmd_list[i]); - } - - if (status != DMUB_STATUS_OK) { - if (status != DMUB_STATUS_POWER_STATE_D3) { - DC_ERROR("Error queueing DMUB command: status=%d\n", status); - dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); - } - return false; - } - } - - status = dmub_srv_cmd_execute(dmub); - if (status != DMUB_STATUS_OK) { - if (status != DMUB_STATUS_POWER_STATE_D3) { - DC_ERROR("Error starting DMUB execution: status=%d\n", status); - dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); - } + if (!dc_dmub_srv_cmd_list_queue_execute(dc_dmub_srv, count, cmd_list)) return false; - } - - // Wait for DMUB to process command - if (wait_type != DM_DMUB_WAIT_TYPE_NO_WAIT) { - if (dc_dmub_srv->ctx->dc->debug.disable_timeout) { - do { - status = dmub_srv_wait_for_idle(dmub, 100000); - } while (status != DMUB_STATUS_OK); - } else - status = dmub_srv_wait_for_idle(dmub, 100000); - if (status != DMUB_STATUS_OK) { - DC_LOG_DEBUG("No reply for DMUB command: status=%d\n", status); - dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); - return false; - } - - // Copy data back from ring buffer into command - if (wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) - dmub_rb_get_return_data(&dmub->inbox1_rb, cmd_list); - } - - return true; + return dc_dmub_srv_wait_for_idle(dc_dmub_srv, wait_type, cmd_list); } bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv) @@ -1243,7 +1253,7 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); - dc_dmub_srv_wait_idle(dc->ctx->dmub_srv); + dc_dmub_srv_wait_for_idle(dc->ctx->dmub_srv, DM_DMUB_WAIT_TYPE_WAIT, NULL); memset(&new_signals, 0, sizeof(new_signals)); @@ -1400,7 +1410,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); - dmub_srv_sync_inbox1(dc->ctx->dmub_srv->dmub); + dmub_srv_sync_inboxes(dc->ctx->dmub_srv->dmub); } } @@ -1654,7 +1664,8 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc, /* fill in generic command header */ global_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; global_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; - global_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); + global_cmd->header.payload_bytes = + sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); if (enable) { /* send global configuration parameters */ @@ -1673,11 +1684,13 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc, /* configure command header */ stream_base_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; stream_base_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; - stream_base_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); + stream_base_cmd->header.payload_bytes = + sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); stream_base_cmd->header.multi_cmd_pending = 1; stream_sub_state_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; stream_sub_state_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; - stream_sub_state_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); + stream_sub_state_cmd->header.payload_bytes = + sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); stream_sub_state_cmd->header.multi_cmd_pending = 1; /* copy stream static base state */ memcpy(&stream_base_cmd->config, @@ -1723,7 +1736,8 @@ void dc_dmub_srv_fams2_drr_update(struct dc *dc, cmd.fams2_drr_update.dmub_optc_state_req.v_total_mid_frame_num = vtotal_mid_frame_num; cmd.fams2_drr_update.dmub_optc_state_req.program_manual_trigger = program_manual_trigger; - cmd.fams2_drr_update.header.payload_bytes = sizeof(cmd.fams2_drr_update) - sizeof(cmd.fams2_drr_update.header); + cmd.fams2_drr_update.header.payload_bytes = + sizeof(cmd.fams2_drr_update) - sizeof(cmd.fams2_drr_update.header); dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } @@ -1759,7 +1773,8 @@ void dc_dmub_srv_fams2_passthrough_flip( /* build command header */ cmds[num_cmds].fams2_flip.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; cmds[num_cmds].fams2_flip.header.sub_type = DMUB_CMD__FAMS2_FLIP; - cmds[num_cmds].fams2_flip.header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2_flip); + cmds[num_cmds].fams2_flip.header.payload_bytes = + sizeof(struct dmub_rb_cmd_fams2_flip) - sizeof(struct dmub_cmd_header); /* for chaining multiple commands, all but last command should set to 1 */ cmds[num_cmds].fams2_flip.header.multi_cmd_pending = 1; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index a636f4c3f01d..ada5c2fb2db3 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -58,7 +58,7 @@ struct dc_dmub_srv { bool needs_idle_wake; }; -void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv); +bool dc_dmub_srv_wait_for_pending(struct dc_dmub_srv *dc_dmub_srv); bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv); diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c index 8f077e15b4f0..72b87b78da0e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c @@ -682,7 +682,7 @@ void reg_sequence_wait_done(const struct dc_context *ctx) if (offload && ctx->dc->debug.dmub_offload_enabled && !ctx->dc->debug.dmcub_emulation) { - dc_dmub_srv_wait_idle(ctx->dmub_srv); + dc_dmub_srv_wait_for_idle(ctx->dmub_srv, DM_DMUB_WAIT_TYPE_WAIT, NULL); } } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c index 0d7e7f3b81a1..a641ae04450c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c @@ -240,7 +240,8 @@ bool dmub_abm_save_restore( cmd.abm_save_restore.abm_init_config_data.version = DMUB_CMD_ABM_CONTROL_VERSION_1; cmd.abm_save_restore.abm_init_config_data.panel_mask = panel_mask; - cmd.abm_save_restore.header.payload_bytes = sizeof(struct dmub_rb_cmd_abm_save_restore); + cmd.abm_save_restore.header.payload_bytes = + sizeof(struct dmub_rb_cmd_abm_save_restore) - sizeof(struct dmub_cmd_header); dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index c31e4f26a305..fcd3d86ad517 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -280,7 +280,9 @@ static void dmub_replay_set_power_opt_and_coasting_vtotal(struct dmub_replay *dm memset(&cmd, 0, sizeof(cmd)); pCmd->header.type = DMUB_CMD__REPLAY; pCmd->header.sub_type = DMUB_CMD__REPLAY_SET_POWER_OPT_AND_COASTING_VTOTAL; - pCmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal); + pCmd->header.payload_bytes = + sizeof(struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal) - + sizeof(struct dmub_cmd_header); pCmd->replay_set_power_opt_data.power_opt = power_opt; pCmd->replay_set_power_opt_data.panel_inst = panel_inst; pCmd->replay_set_coasting_vtotal_data.coasting_vtotal = (coasting_vtotal & 0xFFFF); @@ -319,7 +321,8 @@ static void dmub_replay_send_cmd(struct dmub_replay *dmub, cmd.replay_set_timing_sync.header.sub_type = DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED; cmd.replay_set_timing_sync.header.payload_bytes = - sizeof(struct dmub_rb_cmd_replay_set_timing_sync); + sizeof(struct dmub_rb_cmd_replay_set_timing_sync) - + sizeof(struct dmub_cmd_header); //Cmd Body cmd.replay_set_timing_sync.replay_set_timing_sync_data.panel_inst = cmd_element->sync_data.panel_inst; @@ -331,7 +334,8 @@ static void dmub_replay_send_cmd(struct dmub_replay *dmub, cmd.replay_set_frameupdate_timer.header.sub_type = DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER; cmd.replay_set_frameupdate_timer.header.payload_bytes = - sizeof(struct dmub_rb_cmd_replay_set_frameupdate_timer); + sizeof(struct dmub_rb_cmd_replay_set_frameupdate_timer) - + sizeof(struct dmub_cmd_header); //Cmd Body cmd.replay_set_frameupdate_timer.data.panel_inst = cmd_element->panel_inst; @@ -345,7 +349,8 @@ static void dmub_replay_send_cmd(struct dmub_replay *dmub, cmd.replay_set_pseudo_vtotal.header.sub_type = DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL; cmd.replay_set_pseudo_vtotal.header.payload_bytes = - sizeof(struct dmub_rb_cmd_replay_set_pseudo_vtotal); + sizeof(struct dmub_rb_cmd_replay_set_pseudo_vtotal) - + sizeof(struct dmub_cmd_header); //Cmd Body cmd.replay_set_pseudo_vtotal.data.panel_inst = cmd_element->pseudo_vtotal_data.panel_inst; @@ -357,7 +362,8 @@ static void dmub_replay_send_cmd(struct dmub_replay *dmub, cmd.replay_disabled_adaptive_sync_sdp.header.sub_type = DMUB_CMD__REPLAY_DISABLED_ADAPTIVE_SYNC_SDP; cmd.replay_disabled_adaptive_sync_sdp.header.payload_bytes = - sizeof(struct dmub_rb_cmd_replay_disabled_adaptive_sync_sdp); + sizeof(struct dmub_rb_cmd_replay_disabled_adaptive_sync_sdp) - + sizeof(struct dmub_cmd_header); //Cmd Body cmd.replay_disabled_adaptive_sync_sdp.data.panel_inst = cmd_element->disabled_adaptive_sync_sdp_data.panel_inst; @@ -369,7 +375,8 @@ static void dmub_replay_send_cmd(struct dmub_replay *dmub, cmd.replay_set_general_cmd.header.sub_type = DMUB_CMD__REPLAY_SET_GENERAL_CMD; cmd.replay_set_general_cmd.header.payload_bytes = - sizeof(struct dmub_rb_cmd_replay_set_general_cmd); + sizeof(struct dmub_rb_cmd_replay_set_general_cmd) - + sizeof(struct dmub_cmd_header); //Cmd Body cmd.replay_set_general_cmd.data.panel_inst = cmd_element->set_general_cmd_data.panel_inst; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c index 0d123e647652..c149210096ac 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c @@ -92,6 +92,7 @@ bool dpia_query_hpd_status(struct dc_link *link) /* prepare QUERY_HPD command */ cmd.query_hpd.header.type = DMUB_CMD__QUERY_HPD_STATE; + cmd.query_hpd.header.payload_bytes = sizeof(cmd.query_hpd.data); cmd.query_hpd.data.instance = link->link_id.enum_id - ENUM_ID_1; cmd.query_hpd.data.ch_type = AUX_CHANNEL_DPIA; diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index 8ebeb34021e5..203e3a440845 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -51,8 +51,8 @@ * for the cache windows. * * The call to dmub_srv_hw_init() programs the DMCUB registers to prepare - * for command submission. Commands can be queued via dmub_srv_cmd_queue() - * and executed via dmub_srv_cmd_execute(). + * for command submission. Commands can be queued via dmub_srv_fb_cmd_queue() + * and executed via dmub_srv_fb_cmd_execute(). * * If the queue is full the dmub_srv_wait_for_idle() call can be used to * wait until the queue has been cleared. @@ -170,6 +170,13 @@ enum dmub_srv_power_state_type { DMUB_POWER_STATE_D3 = 8 }; +/* enum dmub_inbox_cmd_interface type - defines default interface for host->dmub commands */ +enum dmub_inbox_cmd_interface_type { + DMUB_CMD_INTERFACE_DEFAULT = 0, + DMUB_CMD_INTERFACE_FB = 1, + DMUB_CMD_INTERFACE_REG = 2, +}; + /** * struct dmub_region - dmub hw memory region * @base: base address for region, must be 256 byte aligned @@ -349,6 +356,21 @@ struct dmub_diagnostic_data { uint8_t is_cw6_enabled : 1; }; +struct dmub_srv_inbox { + /* generic status */ + uint64_t num_submitted; + uint64_t num_reported; + union { + /* frame buffer mailbox status */ + struct dmub_rb rb; + /* register mailbox status */ + struct { + bool is_pending; + bool is_multi_pending; + }; + }; +}; + /** * struct dmub_srv_base_funcs - Driver specific base callbacks */ @@ -462,18 +484,21 @@ struct dmub_srv_hw_funcs { void (*init_reg_offsets)(struct dmub_srv *dmub, struct dc_context *ctx); void (*subvp_save_surf_addr)(struct dmub_srv *dmub, const struct dc_plane_address *addr, uint8_t subvp_index); + void (*send_reg_inbox0_cmd_msg)(struct dmub_srv *dmub, union dmub_rb_cmd *cmd); uint32_t (*read_reg_inbox0_rsp_int_status)(struct dmub_srv *dmub); void (*read_reg_inbox0_cmd_rsp)(struct dmub_srv *dmub, union dmub_rb_cmd *cmd); void (*write_reg_inbox0_rsp_int_ack)(struct dmub_srv *dmub); + void (*clear_reg_inbox0_rsp_int_ack)(struct dmub_srv *dmub); + void (*enable_reg_inbox0_rsp_int)(struct dmub_srv *dmub, bool enable); + uint32_t (*read_reg_outbox0_rdy_int_status)(struct dmub_srv *dmub); void (*write_reg_outbox0_rdy_int_ack)(struct dmub_srv *dmub); void (*read_reg_outbox0_msg)(struct dmub_srv *dmub, uint32_t *msg); void (*write_reg_outbox0_rsp)(struct dmub_srv *dmub, uint32_t *rsp); uint32_t (*read_reg_outbox0_rsp_int_status)(struct dmub_srv *dmub); - void (*enable_reg_inbox0_rsp_int)(struct dmub_srv *dmub, bool enable); void (*enable_reg_outbox0_rdy_int)(struct dmub_srv *dmub, bool enable); }; @@ -493,6 +518,7 @@ struct dmub_srv_create_params { enum dmub_asic asic; uint32_t fw_version; bool is_virtual; + enum dmub_inbox_cmd_interface_type inbox_type; }; /** @@ -519,11 +545,11 @@ struct dmub_srv { struct dmub_srv_dcn32_regs *regs_dcn32; struct dmub_srv_dcn35_regs *regs_dcn35; const struct dmub_srv_dcn401_regs *regs_dcn401; - struct dmub_srv_base_funcs funcs; struct dmub_srv_hw_funcs hw_funcs; - struct dmub_rb inbox1_rb; + struct dmub_srv_inbox inbox1; uint32_t inbox1_last_wptr; + struct dmub_srv_inbox reg_inbox0; /** * outbox1_rb is accessed without locks (dal & dc) * and to be used only in dmub_srv_stat_get_notification() @@ -543,6 +569,7 @@ struct dmub_srv { struct dmub_fw_meta_info meta_info; struct dmub_feature_caps feature_caps; struct dmub_visual_confirm_color visual_confirm_color; + enum dmub_inbox_cmd_interface_type inbox_type; enum dmub_srv_power_state_type power_state; struct dmub_diagnostic_data debug; @@ -700,19 +727,7 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub); /** - * dmub_srv_sync_inbox1() - sync sw state with hw state - * @dmub: the dmub service - * - * Sync sw state with hw state when resume from S0i3 - * - * Return: - * DMUB_STATUS_OK - success - * DMUB_STATUS_INVALID - unspecified error - */ -enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub); - -/** - * dmub_srv_cmd_queue() - queues a command to the DMUB + * dmub_srv_fb_cmd_queue() - queues a command to the DMUB * @dmub: the dmub service * @cmd: the command to queue * @@ -724,11 +739,11 @@ enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub); * DMUB_STATUS_QUEUE_FULL - no remaining room in queue * DMUB_STATUS_INVALID - unspecified error */ -enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, +enum dmub_status dmub_srv_fb_cmd_queue(struct dmub_srv *dmub, const union dmub_rb_cmd *cmd); /** - * dmub_srv_cmd_execute() - Executes a queued sequence to the dmub + * dmub_srv_fb_cmd_execute() - Executes a queued sequence to the dmub * @dmub: the dmub service * * Begins execution of queued commands on the dmub. @@ -737,7 +752,7 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, * DMUB_STATUS_OK - success * DMUB_STATUS_INVALID - unspecified error */ -enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub); +enum dmub_status dmub_srv_fb_cmd_execute(struct dmub_srv *dmub); /** * dmub_srv_wait_for_hw_pwr_up() - Waits for firmware hardware power up is completed @@ -795,6 +810,23 @@ enum dmub_status dmub_srv_wait_for_auto_load(struct dmub_srv *dmub, enum dmub_status dmub_srv_wait_for_phy_init(struct dmub_srv *dmub, uint32_t timeout_us); +/** + * dmub_srv_wait_for_pending() - Re-entrant wait for messages currently pending + * @dmub: the dmub service + * @timeout_us: the maximum number of microseconds to wait + * + * Waits until the commands queued prior to this call are complete. + * If interfaces remain busy due to additional work being submitted + * concurrently, this function will not continue to wait. + * + * Return: + * DMUB_STATUS_OK - success + * DMUB_STATUS_TIMEOUT - wait for buffer to flush timed out + * DMUB_STATUS_INVALID - unspecified error + */ +enum dmub_status dmub_srv_wait_for_pending(struct dmub_srv *dmub, + uint32_t timeout_us); + /** * dmub_srv_wait_for_idle() - Waits for the DMUB to be idle * @dmub: the dmub service @@ -893,9 +925,6 @@ enum dmub_status dmub_srv_get_fw_boot_status(struct dmub_srv *dmub, enum dmub_status dmub_srv_get_fw_boot_option(struct dmub_srv *dmub, union dmub_fw_boot_options *option); -enum dmub_status dmub_srv_cmd_with_reply_data(struct dmub_srv *dmub, - union dmub_rb_cmd *cmd); - enum dmub_status dmub_srv_set_skip_panel_power_sequence(struct dmub_srv *dmub, bool skip); @@ -959,26 +988,6 @@ enum dmub_status dmub_srv_clear_inbox0_ack(struct dmub_srv *dmub); */ void dmub_srv_subvp_save_surf_addr(struct dmub_srv *dmub, const struct dc_plane_address *addr, uint8_t subvp_index); -/** - * dmub_srv_send_reg_inbox0_cmd() - send a dmub command and wait for the command - * being processed by DMUB. - * @dmub: The dmub service - * @cmd: The dmub command being sent. If with_replay is true, the function will - * update cmd with replied data. - * @with_reply: true if DMUB reply needs to be copied back to cmd. false if the - * cmd doesn't need to be replied. - * @timeout_us: timeout in microseconds. - * - * Return: - * DMUB_STATUS_OK - success - * DMUB_STATUS_TIMEOUT - DMUB fails to process the command within the timeout - * interval. - */ -enum dmub_status dmub_srv_send_reg_inbox0_cmd( - struct dmub_srv *dmub, - union dmub_rb_cmd *cmd, - bool with_reply, uint32_t timeout_us); - /** * dmub_srv_set_power_state() - Track DC power state in dmub_srv * @dmub: The dmub service @@ -991,4 +1000,40 @@ enum dmub_status dmub_srv_send_reg_inbox0_cmd( */ void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state); +/** + * dmub_srv_reg_cmd_execute() - Executes provided command to the dmub + * @dmub: the dmub service + * @cmd: the command packet to be executed + * + * Executes a single command for the dmub. + * + * Return: + * DMUB_STATUS_OK - success + * DMUB_STATUS_INVALID - unspecified error + */ +enum dmub_status dmub_srv_reg_cmd_execute(struct dmub_srv *dmub, union dmub_rb_cmd *cmd); + + +/** + * dmub_srv_cmd_get_response() - Copies return data for command into buffer + * @dmub: the dmub service + * @cmd_rsp: response buffer + * + * Copies return data for command into buffer + */ +void dmub_srv_cmd_get_response(struct dmub_srv *dmub, + union dmub_rb_cmd *cmd_rsp); + +/** + * dmub_srv_sync_inboxes() - Sync inbox state + * @dmub: the dmub service + * + * Sync inbox state + * + * Return: + * DMUB_STATUS_OK - success + * DMUB_STATUS_INVALID - unspecified error + */ +enum dmub_status dmub_srv_sync_inboxes(struct dmub_srv *dmub); + #endif /* _DMUB_SRV_H_ */ diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index f84bbc033e64..1f5f4e3e49d4 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -1331,6 +1331,16 @@ enum dmub_inbox0_command { */ #define DMUB_RB_SIZE (DMUB_RB_CMD_SIZE * DMUB_RB_MAX_ENTRY) +/** + * Maximum number of items in the DMUB REG INBOX0 internal ringbuffer. + */ +#define DMUB_REG_INBOX0_RB_MAX_ENTRY 16 + +/** + * Ringbuffer size in bytes. + */ +#define DMUB_REG_INBOX0_RB_SIZE (DMUB_RB_CMD_SIZE * DMUB_REG_INBOX0_RB_MAX_ENTRY) + /** * REG_SET mask for reg offload. */ @@ -1533,7 +1543,8 @@ struct dmub_cmd_header { unsigned int sub_type : 8; /**< command sub type */ unsigned int ret_status : 1; /**< 1 if returned data, 0 otherwise */ unsigned int multi_cmd_pending : 1; /**< 1 if multiple commands chained together */ - unsigned int reserved0 : 6; /**< reserved bits */ + unsigned int is_reg_based : 1; /**< 1 if register based mailbox cmd, 0 if FB based cmd */ + unsigned int reserved0 : 5; /**< reserved bits */ unsigned int payload_bytes : 6; /* payload excluding header - up to 60 bytes */ unsigned int reserved1 : 2; /**< reserved bits */ }; @@ -5890,6 +5901,42 @@ static inline bool dmub_rb_empty(struct dmub_rb *rb) return (rb->wrpt == rb->rptr); } +/** + * @brief gets number of outstanding requests in the RB + * + * @param rb DMUB Ringbuffer + * @return true if full + */ +static inline uint32_t dmub_rb_num_outstanding(struct dmub_rb *rb) +{ + uint32_t data_count; + + if (rb->wrpt >= rb->rptr) + data_count = rb->wrpt - rb->rptr; + else + data_count = rb->capacity - (rb->rptr - rb->wrpt); + + return data_count / DMUB_RB_CMD_SIZE; +} + +/** + * @brief gets number of free buffers in the RB + * + * @param rb DMUB Ringbuffer + * @return true if full + */ +static inline uint32_t dmub_rb_num_free(struct dmub_rb *rb) +{ + uint32_t data_count; + + if (rb->wrpt >= rb->rptr) + data_count = rb->wrpt - rb->rptr; + else + data_count = rb->capacity - (rb->rptr - rb->wrpt); + + return (rb->capacity - data_count) / DMUB_RB_CMD_SIZE; +} + /** * @brief Checks if the ringbuffer is full * diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c index e67f7c4784eb..731ca9b6a6cf 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c @@ -517,28 +517,69 @@ void dmub_dcn401_send_reg_inbox0_cmd_msg(struct dmub_srv *dmub, union dmub_rb_cmd *cmd) { uint32_t *dwords = (uint32_t *)cmd; - + int32_t payload_size_bytes = cmd->cmd_common.header.payload_bytes; + uint32_t msg_index; static_assert(sizeof(*cmd) == 64, "DMUB command size mismatch"); - REG_WRITE(DMCUB_REG_INBOX0_MSG0, dwords[0]); - REG_WRITE(DMCUB_REG_INBOX0_MSG1, dwords[1]); - REG_WRITE(DMCUB_REG_INBOX0_MSG2, dwords[2]); - REG_WRITE(DMCUB_REG_INBOX0_MSG3, dwords[3]); - REG_WRITE(DMCUB_REG_INBOX0_MSG4, dwords[4]); - REG_WRITE(DMCUB_REG_INBOX0_MSG5, dwords[5]); - REG_WRITE(DMCUB_REG_INBOX0_MSG6, dwords[6]); - REG_WRITE(DMCUB_REG_INBOX0_MSG7, dwords[7]); - REG_WRITE(DMCUB_REG_INBOX0_MSG8, dwords[8]); - REG_WRITE(DMCUB_REG_INBOX0_MSG9, dwords[9]); - REG_WRITE(DMCUB_REG_INBOX0_MSG10, dwords[10]); - REG_WRITE(DMCUB_REG_INBOX0_MSG11, dwords[11]); - REG_WRITE(DMCUB_REG_INBOX0_MSG12, dwords[12]); - REG_WRITE(DMCUB_REG_INBOX0_MSG13, dwords[13]); - REG_WRITE(DMCUB_REG_INBOX0_MSG14, dwords[14]); + /* read remaining data based on payload size */ + for (msg_index = 0; msg_index < 15; msg_index++) { + if (payload_size_bytes <= msg_index * 4) { + break; + } + + switch (msg_index) { + case 0: + REG_WRITE(DMCUB_REG_INBOX0_MSG0, dwords[msg_index + 1]); + break; + case 1: + REG_WRITE(DMCUB_REG_INBOX0_MSG1, dwords[msg_index + 1]); + break; + case 2: + REG_WRITE(DMCUB_REG_INBOX0_MSG2, dwords[msg_index + 1]); + break; + case 3: + REG_WRITE(DMCUB_REG_INBOX0_MSG3, dwords[msg_index + 1]); + break; + case 4: + REG_WRITE(DMCUB_REG_INBOX0_MSG4, dwords[msg_index + 1]); + break; + case 5: + REG_WRITE(DMCUB_REG_INBOX0_MSG5, dwords[msg_index + 1]); + break; + case 6: + REG_WRITE(DMCUB_REG_INBOX0_MSG6, dwords[msg_index + 1]); + break; + case 7: + REG_WRITE(DMCUB_REG_INBOX0_MSG7, dwords[msg_index + 1]); + break; + case 8: + REG_WRITE(DMCUB_REG_INBOX0_MSG8, dwords[msg_index + 1]); + break; + case 9: + REG_WRITE(DMCUB_REG_INBOX0_MSG9, dwords[msg_index + 1]); + break; + case 10: + REG_WRITE(DMCUB_REG_INBOX0_MSG10, dwords[msg_index + 1]); + break; + case 11: + REG_WRITE(DMCUB_REG_INBOX0_MSG11, dwords[msg_index + 1]); + break; + case 12: + REG_WRITE(DMCUB_REG_INBOX0_MSG12, dwords[msg_index + 1]); + break; + case 13: + REG_WRITE(DMCUB_REG_INBOX0_MSG13, dwords[msg_index + 1]); + break; + case 14: + REG_WRITE(DMCUB_REG_INBOX0_MSG14, dwords[msg_index + 1]); + break; + } + } + /* writing to INBOX RDY register will trigger DMUB REG INBOX0 RDY * interrupt. */ - REG_WRITE(DMCUB_REG_INBOX0_RDY, dwords[15]); + REG_WRITE(DMCUB_REG_INBOX0_RDY, dwords[0]); } uint32_t dmub_dcn401_read_reg_inbox0_rsp_int_status(struct dmub_srv *dmub) @@ -556,30 +597,39 @@ void dmub_dcn401_read_reg_inbox0_cmd_rsp(struct dmub_srv *dmub, static_assert(sizeof(*cmd) == 64, "DMUB command size mismatch"); - dwords[0] = REG_READ(DMCUB_REG_INBOX0_MSG0); - dwords[1] = REG_READ(DMCUB_REG_INBOX0_MSG1); - dwords[2] = REG_READ(DMCUB_REG_INBOX0_MSG2); - dwords[3] = REG_READ(DMCUB_REG_INBOX0_MSG3); - dwords[4] = REG_READ(DMCUB_REG_INBOX0_MSG4); - dwords[5] = REG_READ(DMCUB_REG_INBOX0_MSG5); - dwords[6] = REG_READ(DMCUB_REG_INBOX0_MSG6); - dwords[7] = REG_READ(DMCUB_REG_INBOX0_MSG7); - dwords[8] = REG_READ(DMCUB_REG_INBOX0_MSG8); - dwords[9] = REG_READ(DMCUB_REG_INBOX0_MSG9); - dwords[10] = REG_READ(DMCUB_REG_INBOX0_MSG10); - dwords[11] = REG_READ(DMCUB_REG_INBOX0_MSG11); - dwords[12] = REG_READ(DMCUB_REG_INBOX0_MSG12); - dwords[13] = REG_READ(DMCUB_REG_INBOX0_MSG13); - dwords[14] = REG_READ(DMCUB_REG_INBOX0_MSG14); - dwords[15] = REG_READ(DMCUB_REG_INBOX0_RSP); + dwords[0] = REG_READ(DMCUB_REG_INBOX0_RSP); + dwords[1] = REG_READ(DMCUB_REG_INBOX0_MSG0); + dwords[2] = REG_READ(DMCUB_REG_INBOX0_MSG1); + dwords[3] = REG_READ(DMCUB_REG_INBOX0_MSG2); + dwords[4] = REG_READ(DMCUB_REG_INBOX0_MSG3); + dwords[5] = REG_READ(DMCUB_REG_INBOX0_MSG4); + dwords[6] = REG_READ(DMCUB_REG_INBOX0_MSG5); + dwords[7] = REG_READ(DMCUB_REG_INBOX0_MSG6); + dwords[8] = REG_READ(DMCUB_REG_INBOX0_MSG7); + dwords[9] = REG_READ(DMCUB_REG_INBOX0_MSG8); + dwords[10] = REG_READ(DMCUB_REG_INBOX0_MSG9); + dwords[11] = REG_READ(DMCUB_REG_INBOX0_MSG10); + dwords[12] = REG_READ(DMCUB_REG_INBOX0_MSG11); + dwords[13] = REG_READ(DMCUB_REG_INBOX0_MSG12); + dwords[14] = REG_READ(DMCUB_REG_INBOX0_MSG13); + dwords[15] = REG_READ(DMCUB_REG_INBOX0_MSG14); } void dmub_dcn401_write_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub) { REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_ACK, 1); +} + +void dmub_dcn401_clear_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub) +{ REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_ACK, 0); } +void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable) +{ + REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_EN, enable ? 1:0); +} + void dmub_dcn401_write_reg_outbox0_rdy_int_ack(struct dmub_srv *dmub) { REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_ACK, 1); @@ -604,11 +654,6 @@ uint32_t dmub_dcn401_read_reg_outbox0_rsp_int_status(struct dmub_srv *dmub) return status; } -void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable) -{ - REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_EN, enable ? 1:0); -} - void dmub_dcn401_enable_reg_outbox0_rdy_int(struct dmub_srv *dmub, bool enable) { REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_EN, enable ? 1:0); diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h index c35be52676f6..88c3a44d67d9 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h @@ -277,11 +277,13 @@ uint32_t dmub_dcn401_read_reg_inbox0_rsp_int_status(struct dmub_srv *dmub); void dmub_dcn401_read_reg_inbox0_cmd_rsp(struct dmub_srv *dmub, union dmub_rb_cmd *cmd); void dmub_dcn401_write_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub); +void dmub_dcn401_clear_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub); +void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable); + void dmub_dcn401_write_reg_outbox0_rdy_int_ack(struct dmub_srv *dmub); void dmub_dcn401_read_reg_outbox0_msg(struct dmub_srv *dmub, uint32_t *msg); void dmub_dcn401_write_reg_outbox0_rsp(struct dmub_srv *dmub, uint32_t *msg); uint32_t dmub_dcn401_read_reg_outbox0_rsp_int_status(struct dmub_srv *dmub); -void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable); void dmub_dcn401_enable_reg_outbox0_rdy_int(struct dmub_srv *dmub, bool enable); uint32_t dmub_dcn401_read_reg_outbox0_rdy_int_status(struct dmub_srv *dmub); diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index 0a2285540daf..713576a1f6fa 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -157,6 +157,9 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic) { struct dmub_srv_hw_funcs *funcs = &dmub->hw_funcs; + /* default to specifying now inbox type */ + enum dmub_inbox_cmd_interface_type default_inbox_type = DMUB_CMD_INTERFACE_DEFAULT; + switch (asic) { case DMUB_ASIC_DCN20: case DMUB_ASIC_DCN21: @@ -353,7 +356,7 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic) funcs->init_reg_offsets = dmub_srv_dcn35_regs_init; if (asic == DMUB_ASIC_DCN351) - funcs->init_reg_offsets = dmub_srv_dcn351_regs_init; + funcs->init_reg_offsets = dmub_srv_dcn351_regs_init; if (asic == DMUB_ASIC_DCN36) funcs->init_reg_offsets = dmub_srv_dcn36_regs_init; @@ -395,10 +398,15 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic) funcs->get_current_time = dmub_dcn401_get_current_time; funcs->get_diagnostic_data = dmub_dcn401_get_diagnostic_data; + funcs->send_reg_inbox0_cmd_msg = dmub_dcn401_send_reg_inbox0_cmd_msg; funcs->read_reg_inbox0_rsp_int_status = dmub_dcn401_read_reg_inbox0_rsp_int_status; funcs->read_reg_inbox0_cmd_rsp = dmub_dcn401_read_reg_inbox0_cmd_rsp; funcs->write_reg_inbox0_rsp_int_ack = dmub_dcn401_write_reg_inbox0_rsp_int_ack; + funcs->clear_reg_inbox0_rsp_int_ack = dmub_dcn401_clear_reg_inbox0_rsp_int_ack; + funcs->enable_reg_inbox0_rsp_int = dmub_dcn401_enable_reg_inbox0_rsp_int; + default_inbox_type = DMUB_CMD_INTERFACE_FB; // still default to FB for now + funcs->write_reg_outbox0_rdy_int_ack = dmub_dcn401_write_reg_outbox0_rdy_int_ack; funcs->read_reg_outbox0_msg = dmub_dcn401_read_reg_outbox0_msg; funcs->write_reg_outbox0_rsp = dmub_dcn401_write_reg_outbox0_rsp; @@ -411,6 +419,20 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic) return false; } + /* set default inbox type if not overriden */ + if (dmub->inbox_type == DMUB_CMD_INTERFACE_DEFAULT) { + if (default_inbox_type != DMUB_CMD_INTERFACE_DEFAULT) { + /* use default inbox type as specified by DCN rev */ + dmub->inbox_type = default_inbox_type; + } else if (funcs->send_reg_inbox0_cmd_msg) { + /* prefer reg as default inbox type if present */ + dmub->inbox_type = DMUB_CMD_INTERFACE_REG; + } else { + /* use fb as fallback */ + dmub->inbox_type = DMUB_CMD_INTERFACE_FB; + } + } + return true; } @@ -426,6 +448,7 @@ enum dmub_status dmub_srv_create(struct dmub_srv *dmub, dmub->asic = params->asic; dmub->fw_version = params->fw_version; dmub->is_virtual = params->is_virtual; + dmub->inbox_type = params->inbox_type; /* Setup asic dependent hardware funcs. */ if (!dmub_srv_hw_setup(dmub, params->asic)) { @@ -695,7 +718,7 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, inbox1.base = cw4.region.base; inbox1.top = cw4.region.base + DMUB_RB_SIZE; outbox1.base = inbox1.top; - outbox1.top = cw4.region.top; + outbox1.top = inbox1.top + DMUB_RB_SIZE; cw5.offset.quad_part = tracebuff_fb->gpu_addr; cw5.region.base = DMUB_CW5_BASE; @@ -737,7 +760,7 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, rb_params.ctx = dmub; rb_params.base_address = mail_fb->cpu_addr; rb_params.capacity = DMUB_RB_SIZE; - dmub_rb_init(&dmub->inbox1_rb, &rb_params); + dmub_rb_init(&dmub->inbox1.rb, &rb_params); // Initialize outbox1 ring buffer rb_params.ctx = dmub; @@ -768,27 +791,6 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, return DMUB_STATUS_OK; } -enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub) -{ - if (!dmub->sw_init) - return DMUB_STATUS_INVALID; - - if (dmub->hw_funcs.get_inbox1_rptr && dmub->hw_funcs.get_inbox1_wptr) { - uint32_t rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); - uint32_t wptr = dmub->hw_funcs.get_inbox1_wptr(dmub); - - if (rptr > dmub->inbox1_rb.capacity || wptr > dmub->inbox1_rb.capacity) { - return DMUB_STATUS_HW_FAILURE; - } else { - dmub->inbox1_rb.rptr = rptr; - dmub->inbox1_rb.wrpt = wptr; - dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt; - } - } - - return DMUB_STATUS_OK; -} - enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub) { if (!dmub->sw_init) @@ -799,8 +801,13 @@ enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub) /* mailboxes have been reset in hw, so reset the sw state as well */ dmub->inbox1_last_wptr = 0; - dmub->inbox1_rb.wrpt = 0; - dmub->inbox1_rb.rptr = 0; + dmub->inbox1.rb.wrpt = 0; + dmub->inbox1.rb.rptr = 0; + dmub->inbox1.num_reported = 0; + dmub->inbox1.num_submitted = 0; + dmub->reg_inbox0.num_reported = 0; + dmub->reg_inbox0.num_submitted = 0; + dmub->reg_inbox0.is_pending = 0; dmub->outbox0_rb.wrpt = 0; dmub->outbox0_rb.rptr = 0; dmub->outbox1_rb.wrpt = 0; @@ -811,7 +818,7 @@ enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub) return DMUB_STATUS_OK; } -enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, +enum dmub_status dmub_srv_fb_cmd_queue(struct dmub_srv *dmub, const union dmub_rb_cmd *cmd) { if (!dmub->hw_init) @@ -820,18 +827,20 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, if (dmub->power_state != DMUB_POWER_STATE_D0) return DMUB_STATUS_POWER_STATE_D3; - if (dmub->inbox1_rb.rptr > dmub->inbox1_rb.capacity || - dmub->inbox1_rb.wrpt > dmub->inbox1_rb.capacity) { + if (dmub->inbox1.rb.rptr > dmub->inbox1.rb.capacity || + dmub->inbox1.rb.wrpt > dmub->inbox1.rb.capacity) { return DMUB_STATUS_HW_FAILURE; } - if (dmub_rb_push_front(&dmub->inbox1_rb, cmd)) + if (dmub_rb_push_front(&dmub->inbox1.rb, cmd)) { + dmub->inbox1.num_submitted++; return DMUB_STATUS_OK; + } return DMUB_STATUS_QUEUE_FULL; } -enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub) +enum dmub_status dmub_srv_fb_cmd_execute(struct dmub_srv *dmub) { struct dmub_rb flush_rb; @@ -846,13 +855,13 @@ enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub) * been flushed to framebuffer memory. Otherwise DMCUB might * read back stale, fully invalid or partially invalid data. */ - flush_rb = dmub->inbox1_rb; + flush_rb = dmub->inbox1.rb; flush_rb.rptr = dmub->inbox1_last_wptr; dmub_rb_flush_pending(&flush_rb); - dmub->hw_funcs.set_inbox1_wptr(dmub, dmub->inbox1_rb.wrpt); + dmub->hw_funcs.set_inbox1_wptr(dmub, dmub->inbox1.rb.wrpt); - dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt; + dmub->inbox1_last_wptr = dmub->inbox1.rb.wrpt; return DMUB_STATUS_OK; } @@ -910,26 +919,97 @@ enum dmub_status dmub_srv_wait_for_auto_load(struct dmub_srv *dmub, return DMUB_STATUS_TIMEOUT; } +static void dmub_srv_update_reg_inbox0_status(struct dmub_srv *dmub) +{ + if (dmub->reg_inbox0.is_pending) { + dmub->reg_inbox0.is_pending = dmub->hw_funcs.read_reg_inbox0_rsp_int_status && + !dmub->hw_funcs.read_reg_inbox0_rsp_int_status(dmub); + + if (!dmub->reg_inbox0.is_pending) { + /* ack the rsp interrupt */ + if (dmub->hw_funcs.write_reg_inbox0_rsp_int_ack) + dmub->hw_funcs.write_reg_inbox0_rsp_int_ack(dmub); + + /* only update the reported count if commands aren't being batched */ + if (!dmub->reg_inbox0.is_pending && !dmub->reg_inbox0.is_multi_pending) { + dmub->reg_inbox0.num_reported = dmub->reg_inbox0.num_submitted; + } + } + } +} + +enum dmub_status dmub_srv_wait_for_pending(struct dmub_srv *dmub, + uint32_t timeout_us) +{ + uint32_t i; + const uint32_t polling_interval_us = 1; + struct dmub_srv_inbox scratch_reg_inbox0 = dmub->reg_inbox0; + struct dmub_srv_inbox scratch_inbox1 = dmub->inbox1; + const volatile struct dmub_srv_inbox *reg_inbox0 = &dmub->reg_inbox0; + const volatile struct dmub_srv_inbox *inbox1 = &dmub->inbox1; + + if (!dmub->hw_init || + !dmub->hw_funcs.get_inbox1_wptr) + return DMUB_STATUS_INVALID; + + /* take a snapshot of the required mailbox state */ + scratch_inbox1.rb.wrpt = dmub->hw_funcs.get_inbox1_wptr(dmub); + + for (i = 0; i <= timeout_us; i += polling_interval_us) { + scratch_inbox1.rb.rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); + + scratch_reg_inbox0.is_pending = scratch_reg_inbox0.is_pending && + dmub->hw_funcs.read_reg_inbox0_rsp_int_status && + !dmub->hw_funcs.read_reg_inbox0_rsp_int_status(dmub); + + if (scratch_inbox1.rb.rptr > dmub->inbox1.rb.capacity) + return DMUB_STATUS_HW_FAILURE; + + /* check current HW state first, but use command submission vs reported as a fallback */ + if ((dmub_rb_empty(&scratch_inbox1.rb) || + inbox1->num_reported >= scratch_inbox1.num_submitted) && + (!scratch_reg_inbox0.is_pending || + reg_inbox0->num_reported >= scratch_reg_inbox0.num_submitted)) + return DMUB_STATUS_OK; + + udelay(polling_interval_us); + } + + return DMUB_STATUS_TIMEOUT; +} + enum dmub_status dmub_srv_wait_for_idle(struct dmub_srv *dmub, uint32_t timeout_us) { uint32_t i, rptr; + const uint32_t polling_interval_us = 1; if (!dmub->hw_init) return DMUB_STATUS_INVALID; - for (i = 0; i <= timeout_us; ++i) { - rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); + for (i = 0; i < timeout_us; i += polling_interval_us) { + /* update inbox1 state */ + rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); - if (rptr > dmub->inbox1_rb.capacity) + if (rptr > dmub->inbox1.rb.capacity) return DMUB_STATUS_HW_FAILURE; - dmub->inbox1_rb.rptr = rptr; + if (dmub->inbox1.rb.rptr > rptr) { + /* rb wrapped */ + dmub->inbox1.num_reported += (rptr + dmub->inbox1.rb.capacity - dmub->inbox1.rb.rptr) / DMUB_RB_CMD_SIZE; + } else { + dmub->inbox1.num_reported += (rptr - dmub->inbox1.rb.rptr) / DMUB_RB_CMD_SIZE; + } + dmub->inbox1.rb.rptr = rptr; + + /* update reg_inbox0 */ + dmub_srv_update_reg_inbox0_status(dmub); - if (dmub_rb_empty(&dmub->inbox1_rb)) + /* check for idle */ + if (dmub_rb_empty(&dmub->inbox1.rb) && !dmub->reg_inbox0.is_pending) return DMUB_STATUS_OK; - udelay(1); + udelay(polling_interval_us); } return DMUB_STATUS_TIMEOUT; @@ -1040,35 +1120,6 @@ enum dmub_status dmub_srv_set_skip_panel_power_sequence(struct dmub_srv *dmub, return DMUB_STATUS_OK; } -enum dmub_status dmub_srv_cmd_with_reply_data(struct dmub_srv *dmub, - union dmub_rb_cmd *cmd) -{ - enum dmub_status status = DMUB_STATUS_OK; - - // Queue command - status = dmub_srv_cmd_queue(dmub, cmd); - - if (status != DMUB_STATUS_OK) - return status; - - // Execute command - status = dmub_srv_cmd_execute(dmub); - - if (status != DMUB_STATUS_OK) - return status; - - // Wait for DMUB to process command - status = dmub_srv_wait_for_idle(dmub, 100000); - - if (status != DMUB_STATUS_OK) - return status; - - // Copy data back from ring buffer into command - dmub_rb_get_return_data(&dmub->inbox1_rb, cmd); - - return status; -} - static inline bool dmub_rb_out_trace_buffer_front(struct dmub_rb *rb, void *entry) { @@ -1160,46 +1211,105 @@ void dmub_srv_subvp_save_surf_addr(struct dmub_srv *dmub, const struct dc_plane_ } } -enum dmub_status dmub_srv_send_reg_inbox0_cmd( - struct dmub_srv *dmub, - union dmub_rb_cmd *cmd, - bool with_reply, uint32_t timeout_us) +void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state) { - uint32_t rsp_ready = 0; - uint32_t i; + if (!dmub || !dmub->hw_init) + return; + + dmub->power_state = dmub_srv_power_state; +} + +enum dmub_status dmub_srv_reg_cmd_execute(struct dmub_srv *dmub, union dmub_rb_cmd *cmd) +{ + uint32_t num_pending = 0; + + if (!dmub->hw_init) + return DMUB_STATUS_INVALID; + + if (dmub->power_state != DMUB_POWER_STATE_D0) + return DMUB_STATUS_POWER_STATE_D3; + + if (!dmub->hw_funcs.send_reg_inbox0_cmd_msg || + !dmub->hw_funcs.clear_reg_inbox0_rsp_int_ack) + return DMUB_STATUS_INVALID; + + if (dmub->reg_inbox0.num_submitted >= dmub->reg_inbox0.num_reported) + num_pending = dmub->reg_inbox0.num_submitted - dmub->reg_inbox0.num_reported; + else + /* num_submitted wrapped */ + num_pending = DMUB_REG_INBOX0_RB_MAX_ENTRY - + (dmub->reg_inbox0.num_reported - dmub->reg_inbox0.num_submitted); + if (num_pending >= DMUB_REG_INBOX0_RB_MAX_ENTRY) + return DMUB_STATUS_QUEUE_FULL; + + /* clear last rsp ack and send message */ + dmub->hw_funcs.clear_reg_inbox0_rsp_int_ack(dmub); dmub->hw_funcs.send_reg_inbox0_cmd_msg(dmub, cmd); - for (i = 0; i < timeout_us; i++) { - rsp_ready = dmub->hw_funcs.read_reg_inbox0_rsp_int_status(dmub); - if (rsp_ready) - break; - udelay(1); + dmub->reg_inbox0.num_submitted++; + dmub->reg_inbox0.is_pending = true; + dmub->reg_inbox0.is_multi_pending = cmd->cmd_common.header.multi_cmd_pending; + + return DMUB_STATUS_OK; +} + +void dmub_srv_cmd_get_response(struct dmub_srv *dmub, + union dmub_rb_cmd *cmd_rsp) +{ + if (dmub) { + if (dmub->inbox_type == DMUB_CMD_INTERFACE_REG && + dmub->hw_funcs.read_reg_inbox0_cmd_rsp) { + dmub->hw_funcs.read_reg_inbox0_cmd_rsp(dmub, cmd_rsp); + } else { + dmub_rb_get_return_data(&dmub->inbox1.rb, cmd_rsp); + } } - if (rsp_ready == 0) - return DMUB_STATUS_TIMEOUT; +} - if (with_reply) - dmub->hw_funcs.read_reg_inbox0_cmd_rsp(dmub, cmd); +static enum dmub_status dmub_srv_sync_reg_inbox0(struct dmub_srv *dmub) +{ + if (!dmub || !dmub->sw_init) + return DMUB_STATUS_INVALID; - dmub->hw_funcs.write_reg_inbox0_rsp_int_ack(dmub); + dmub->reg_inbox0.is_pending = 0; + dmub->reg_inbox0.is_multi_pending = 0; - /* wait for rsp int status is cleared to initial state before exit */ - for (; i <= timeout_us; i++) { - rsp_ready = dmub->hw_funcs.read_reg_inbox0_rsp_int_status(dmub); - if (rsp_ready == 0) - break; - udelay(1); + return DMUB_STATUS_OK; +} + +static enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub) +{ + if (!dmub->sw_init) + return DMUB_STATUS_INVALID; + + if (dmub->hw_funcs.get_inbox1_rptr && dmub->hw_funcs.get_inbox1_wptr) { + uint32_t rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); + uint32_t wptr = dmub->hw_funcs.get_inbox1_wptr(dmub); + + if (rptr > dmub->inbox1.rb.capacity || wptr > dmub->inbox1.rb.capacity) { + return DMUB_STATUS_HW_FAILURE; + } else { + dmub->inbox1.rb.rptr = rptr; + dmub->inbox1.rb.wrpt = wptr; + dmub->inbox1_last_wptr = dmub->inbox1.rb.wrpt; + } } - ASSERT(rsp_ready == 0); return DMUB_STATUS_OK; } -void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state) +enum dmub_status dmub_srv_sync_inboxes(struct dmub_srv *dmub) { - if (!dmub || !dmub->hw_init) - return; + enum dmub_status status; - dmub->power_state = dmub_srv_power_state; + status = dmub_srv_sync_reg_inbox0(dmub); + if (status != DMUB_STATUS_OK) + return status; + + status = dmub_srv_sync_inbox1(dmub); + if (status != DMUB_STATUS_OK) + return status; + + return DMUB_STATUS_OK; } From 274a87eb389f58eddcbc5659ab0b180b37e92775 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Thu, 27 Feb 2025 16:36:25 -0700 Subject: [PATCH 15/68] drm/amd/display: Assign normalized_pix_clk when color depth = 14 [WHY & HOW] A warning message "WARNING: CPU: 4 PID: 459 at ... /dc_resource.c:3397 calculate_phy_pix_clks+0xef/0x100 [amdgpu]" occurs because the display_color_depth == COLOR_DEPTH_141414 is not handled. This is observed in Radeon RX 6600 XT. It is fixed by assigning pix_clk * (14 * 3) / 24 - same as the rests. Also fixes the indentation in get_norm_pix_clk. Reviewed-by: Harry Wentland Signed-off-by: Alex Hung Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index ea404435c9b9..313a32248cd7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -3623,10 +3623,13 @@ static int get_norm_pix_clk(const struct dc_crtc_timing *timing) break; case COLOR_DEPTH_121212: normalized_pix_clk = (pix_clk * 36) / 24; - break; + break; + case COLOR_DEPTH_141414: + normalized_pix_clk = (pix_clk * 42) / 24; + break; case COLOR_DEPTH_161616: normalized_pix_clk = (pix_clk * 48) / 24; - break; + break; default: ASSERT(0); break; From dd60bfd5349a7a4c9e1a8b14c935593342c6c6a2 Mon Sep 17 00:00:00 2001 From: Leo Zeng Date: Thu, 27 Feb 2025 15:09:04 -0500 Subject: [PATCH 16/68] drm/amd/display: Fix visual confirm color not updating [WHY] Sometimes visual confirm color is updated, but the background color is not changed. This causes visual confrim to show incorrect colors. [HOW] Update background color when visual confirm color changes. Reviewed-by: Dillon Varone Signed-off-by: Leo Zeng Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 52ee2225e132..55b32dfbfdd6 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -817,7 +817,14 @@ void hwss_build_fast_sequence(struct dc *dc, block_sequence[*num_steps].func = DPP_SET_OUTPUT_TRANSFER_FUNC; (*num_steps)++; } - + if (dc->debug.visual_confirm != VISUAL_CONFIRM_DISABLE && + dc->hwss.update_visual_confirm_color) { + block_sequence[*num_steps].params.update_visual_confirm_params.dc = dc; + block_sequence[*num_steps].params.update_visual_confirm_params.pipe_ctx = current_mpc_pipe; + block_sequence[*num_steps].params.update_visual_confirm_params.mpcc_id = current_mpc_pipe->plane_res.hubp->inst; + block_sequence[*num_steps].func = MPC_UPDATE_VISUAL_CONFIRM; + (*num_steps)++; + } if (current_mpc_pipe->stream->update_flags.bits.out_csc) { block_sequence[*num_steps].params.power_on_mpc_mem_pwr_params.mpc = dc->res_pool->mpc; block_sequence[*num_steps].params.power_on_mpc_mem_pwr_params.mpcc_id = current_mpc_pipe->plane_res.hubp->inst; From d3069feecdb5542604d29b59acfd1fd213bad95b Mon Sep 17 00:00:00 2001 From: Zhikai Zhai Date: Thu, 27 Feb 2025 20:09:14 +0800 Subject: [PATCH 17/68] drm/amd/display: calculate the remain segments for all pipes [WHY] In some cases the remain de-tile buffer segments will be greater than zero if we don't add the non-top pipe to calculate, at this time the override de-tile buffer size will be valid and used. But it makes the de-tile buffer segments used finally for all of pipes exceed the maximum. [HOW] Add the non-top pipe to calculate the remain de-tile buffer segments. Don't set override size to use the average according to pipe count if the value exceed the maximum. Reviewed-by: Charlene Liu Signed-off-by: Zhikai Zhai Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../dc/resource/dcn315/dcn315_resource.c | 42 +++++++++---------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c index 14acef036b5a..6c2bb3f63be1 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c @@ -1698,7 +1698,7 @@ static int dcn315_populate_dml_pipes_from_context( pipes[pipe_cnt].dout.dsc_input_bpc = 0; DC_FP_START(); dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); - if (pixel_rate_crb && !pipe->top_pipe && !pipe->prev_odm_pipe) { + if (pixel_rate_crb) { int bpp = source_format_to_bpp(pipes[pipe_cnt].pipe.src.source_format); /* Ceil to crb segment size */ int approx_det_segs_required_for_pstate = dcn_get_approx_det_segs_required_for_pstate( @@ -1755,28 +1755,26 @@ static int dcn315_populate_dml_pipes_from_context( continue; } - if (!pipe->top_pipe && !pipe->prev_odm_pipe) { - bool split_required = pipe->stream->timing.pix_clk_100hz >= dcn_get_max_non_odm_pix_rate_100hz(&dc->dml.soc) - || (pipe->plane_state && pipe->plane_state->src_rect.width > 5120); - - if (remaining_det_segs > MIN_RESERVED_DET_SEGS && crb_pipes != 0) - pipes[pipe_cnt].pipe.src.det_size_override += (remaining_det_segs - MIN_RESERVED_DET_SEGS) / crb_pipes + - (crb_idx < (remaining_det_segs - MIN_RESERVED_DET_SEGS) % crb_pipes ? 1 : 0); - if (pipes[pipe_cnt].pipe.src.det_size_override > 2 * DCN3_15_MAX_DET_SEGS) { - /* Clamp to 2 pipe split max det segments */ - remaining_det_segs += pipes[pipe_cnt].pipe.src.det_size_override - 2 * (DCN3_15_MAX_DET_SEGS); - pipes[pipe_cnt].pipe.src.det_size_override = 2 * DCN3_15_MAX_DET_SEGS; - } - if (pipes[pipe_cnt].pipe.src.det_size_override > DCN3_15_MAX_DET_SEGS || split_required) { - /* If we are splitting we must have an even number of segments */ - remaining_det_segs += pipes[pipe_cnt].pipe.src.det_size_override % 2; - pipes[pipe_cnt].pipe.src.det_size_override -= pipes[pipe_cnt].pipe.src.det_size_override % 2; - } - /* Convert segments into size for DML use */ - pipes[pipe_cnt].pipe.src.det_size_override *= DCN3_15_CRB_SEGMENT_SIZE_KB; - - crb_idx++; + bool split_required = pipe->stream->timing.pix_clk_100hz >= dcn_get_max_non_odm_pix_rate_100hz(&dc->dml.soc) + || (pipe->plane_state && pipe->plane_state->src_rect.width > 5120); + + if (remaining_det_segs > MIN_RESERVED_DET_SEGS && crb_pipes != 0) + pipes[pipe_cnt].pipe.src.det_size_override += (remaining_det_segs - MIN_RESERVED_DET_SEGS) / crb_pipes + + (crb_idx < (remaining_det_segs - MIN_RESERVED_DET_SEGS) % crb_pipes ? 1 : 0); + if (pipes[pipe_cnt].pipe.src.det_size_override > 2 * DCN3_15_MAX_DET_SEGS) { + /* Clamp to 2 pipe split max det segments */ + remaining_det_segs += pipes[pipe_cnt].pipe.src.det_size_override - 2 * (DCN3_15_MAX_DET_SEGS); + pipes[pipe_cnt].pipe.src.det_size_override = 2 * DCN3_15_MAX_DET_SEGS; + } + if (pipes[pipe_cnt].pipe.src.det_size_override > DCN3_15_MAX_DET_SEGS || split_required) { + /* If we are splitting we must have an even number of segments */ + remaining_det_segs += pipes[pipe_cnt].pipe.src.det_size_override % 2; + pipes[pipe_cnt].pipe.src.det_size_override -= pipes[pipe_cnt].pipe.src.det_size_override % 2; } + /* Convert segments into size for DML use */ + pipes[pipe_cnt].pipe.src.det_size_override *= DCN3_15_CRB_SEGMENT_SIZE_KB; + + crb_idx++; pipe_cnt++; } } From 34935701b7ed1a1ef449310ba041f10964b23cf4 Mon Sep 17 00:00:00 2001 From: Zhongwei Zhang Date: Fri, 28 Feb 2025 10:35:23 +0800 Subject: [PATCH 18/68] drm/amd/display: Correct timing_adjust_pending flag setting. [Why&How] stream->adjust will be overwritten by update->crtc_timing_adjust. We should set update->crtc_timing_adjust->timing_adjust_pending and then overwrite stream->adjust. Reset update->crtc_timing_adjust->timing_adjust_pending after the assignment. Reviewed-by: Charlene Liu Signed-off-by: Zhongwei Zhang Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 5a43e4901cc0..28d1353f403d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3169,8 +3169,9 @@ static void copy_stream_update_to_stream(struct dc *dc, if (update->crtc_timing_adjust) { if (stream->adjust.v_total_min != update->crtc_timing_adjust->v_total_min || stream->adjust.v_total_max != update->crtc_timing_adjust->v_total_max) - stream->adjust.timing_adjust_pending = true; + update->crtc_timing_adjust->timing_adjust_pending = true; stream->adjust = *update->crtc_timing_adjust; + update->crtc_timing_adjust->timing_adjust_pending = false; } if (update->dpms_off) From 29c1c20496a7a9bafe2bc2f833d69aa52e0f2c2d Mon Sep 17 00:00:00 2001 From: Ryan Seto Date: Fri, 28 Feb 2025 14:24:57 -0500 Subject: [PATCH 19/68] drm/amd/display: Prevent VStartup Overflow [Why] For some VR headsets with large blanks, it's possible to overflow the OTG_VSTARTUP_PARAM:VSTARTUP_START register. This can lead to incorrect DML calculations and underflow downstream. [How] Min the calcualted max_vstartup_lines with the max value of the register. Reviewed-by: Dillon Varone Signed-off-by: Ryan Seto Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 4c33d99ca7e8..4c504cb0e1c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -15,6 +15,7 @@ //#define DML_MODE_SUPPORT_USE_DPM_DRAM_BW //#define DML_GLOBAL_PREFETCH_CHECK #define ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE +#define DML_MAX_VSTARTUP_START 1023 const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type) { @@ -3737,6 +3738,7 @@ static unsigned int CalculateMaxVStartup( dml2_printf("DML::%s: vblank_avail = %u\n", __func__, vblank_avail); dml2_printf("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines); #endif + max_vstartup_lines = (unsigned int)math_min2(max_vstartup_lines, DML_MAX_VSTARTUP_START); return max_vstartup_lines; } From 725a04ba5a95e89c89633d4322430cfbca7ce128 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 28 Feb 2025 13:18:14 -0600 Subject: [PATCH 20/68] drm/amd/display: Fix slab-use-after-free on hdcp_work [Why] A slab-use-after-free is reported when HDCP is destroyed but the property_validate_dwork queue is still running. [How] Cancel the delayed work when destroying workqueue. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4006 Fixes: da3fd7ac0bcf ("drm/amd/display: Update CP property based on HW query") Cc: Alex Deucher Reviewed-by: Alex Hung Signed-off-by: Mario Limonciello Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 8238cfd276be..6a4b5f4d8a9d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -455,6 +455,7 @@ void hdcp_destroy(struct kobject *kobj, struct hdcp_workqueue *hdcp_work) for (i = 0; i < hdcp_work->max_link; i++) { cancel_delayed_work_sync(&hdcp_work[i].callback_dwork); cancel_delayed_work_sync(&hdcp_work[i].watchdog_timer_dwork); + cancel_delayed_work_sync(&hdcp_work[i].property_validate_dwork); } sysfs_remove_bin_file(kobj, &hdcp_work[0].attr); From 6b675ab8efbf2bcee25be29e865455c56e246401 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 28 Feb 2025 13:30:01 -0600 Subject: [PATCH 21/68] drm/amd/display: Add scoped mutexes for amdgpu_dm_dhcp [Why] Guards automatically release mutex when it goes out of scope making code easier to follow. [How] Replace all use of mutex_lock()/mutex_unlock() with guard(mutex). Reviewed-by: Alex Hung Signed-off-by: Mario Limonciello Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 37 +++++-------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 6a4b5f4d8a9d..6f8f21e123c8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -172,7 +172,7 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work, struct mod_hdcp_display_adjustment display_adjust; unsigned int conn_index = aconnector->base.index; - mutex_lock(&hdcp_w->mutex); + guard(mutex)(&hdcp_w->mutex); hdcp_w->aconnector[conn_index] = aconnector; memset(&link_adjust, 0, sizeof(link_adjust)); @@ -209,7 +209,6 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work, mod_hdcp_update_display(&hdcp_w->hdcp, conn_index, &link_adjust, &display_adjust, &hdcp_w->output); process_output(hdcp_w); - mutex_unlock(&hdcp_w->mutex); } static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, @@ -220,7 +219,7 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, struct drm_connector_state *conn_state = aconnector->base.state; unsigned int conn_index = aconnector->base.index; - mutex_lock(&hdcp_w->mutex); + guard(mutex)(&hdcp_w->mutex); hdcp_w->aconnector[conn_index] = aconnector; /* the removal of display will invoke auth reset -> hdcp destroy and @@ -239,7 +238,6 @@ static void hdcp_remove_display(struct hdcp_workqueue *hdcp_work, mod_hdcp_remove_display(&hdcp_w->hdcp, aconnector->base.index, &hdcp_w->output); process_output(hdcp_w); - mutex_unlock(&hdcp_w->mutex); } void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_index) @@ -247,7 +245,7 @@ void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_inde struct hdcp_workqueue *hdcp_w = &hdcp_work[link_index]; unsigned int conn_index; - mutex_lock(&hdcp_w->mutex); + guard(mutex)(&hdcp_w->mutex); mod_hdcp_reset_connection(&hdcp_w->hdcp, &hdcp_w->output); @@ -259,8 +257,6 @@ void hdcp_reset_display(struct hdcp_workqueue *hdcp_work, unsigned int link_inde } process_output(hdcp_w); - - mutex_unlock(&hdcp_w->mutex); } void hdcp_handle_cpirq(struct hdcp_workqueue *hdcp_work, unsigned int link_index) @@ -277,7 +273,7 @@ static void event_callback(struct work_struct *work) hdcp_work = container_of(to_delayed_work(work), struct hdcp_workqueue, callback_dwork); - mutex_lock(&hdcp_work->mutex); + guard(mutex)(&hdcp_work->mutex); cancel_delayed_work(&hdcp_work->callback_dwork); @@ -285,8 +281,6 @@ static void event_callback(struct work_struct *work) &hdcp_work->output); process_output(hdcp_work); - - mutex_unlock(&hdcp_work->mutex); } static void event_property_update(struct work_struct *work) @@ -323,7 +317,7 @@ static void event_property_update(struct work_struct *work) continue; drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); - mutex_lock(&hdcp_work->mutex); + guard(mutex)(&hdcp_work->mutex); if (conn_state->commit) { ret = wait_for_completion_interruptible_timeout(&conn_state->commit->hw_done, @@ -355,7 +349,6 @@ static void event_property_update(struct work_struct *work) drm_hdcp_update_content_protection(connector, DRM_MODE_CONTENT_PROTECTION_DESIRED); } - mutex_unlock(&hdcp_work->mutex); drm_modeset_unlock(&dev->mode_config.connection_mutex); } } @@ -368,7 +361,7 @@ static void event_property_validate(struct work_struct *work) struct amdgpu_dm_connector *aconnector; unsigned int conn_index; - mutex_lock(&hdcp_work->mutex); + guard(mutex)(&hdcp_work->mutex); for (conn_index = 0; conn_index < AMDGPU_DM_MAX_DISPLAY_INDEX; conn_index++) { @@ -408,8 +401,6 @@ static void event_property_validate(struct work_struct *work) schedule_work(&hdcp_work->property_update_work); } } - - mutex_unlock(&hdcp_work->mutex); } static void event_watchdog_timer(struct work_struct *work) @@ -420,7 +411,7 @@ static void event_watchdog_timer(struct work_struct *work) struct hdcp_workqueue, watchdog_timer_dwork); - mutex_lock(&hdcp_work->mutex); + guard(mutex)(&hdcp_work->mutex); cancel_delayed_work(&hdcp_work->watchdog_timer_dwork); @@ -429,8 +420,6 @@ static void event_watchdog_timer(struct work_struct *work) &hdcp_work->output); process_output(hdcp_work); - - mutex_unlock(&hdcp_work->mutex); } static void event_cpirq(struct work_struct *work) @@ -439,13 +428,11 @@ static void event_cpirq(struct work_struct *work) hdcp_work = container_of(work, struct hdcp_workqueue, cpirq_work); - mutex_lock(&hdcp_work->mutex); + guard(mutex)(&hdcp_work->mutex); mod_hdcp_process_event(&hdcp_work->hdcp, MOD_HDCP_EVENT_CPIRQ, &hdcp_work->output); process_output(hdcp_work); - - mutex_unlock(&hdcp_work->mutex); } void hdcp_destroy(struct kobject *kobj, struct hdcp_workqueue *hdcp_work) @@ -479,7 +466,7 @@ static bool enable_assr(void *handle, struct dc_link *link) dtm_cmd = (struct ta_dtm_shared_memory *)psp->dtm_context.context.mem_context.shared_buf; - mutex_lock(&psp->dtm_context.mutex); + guard(mutex)(&psp->dtm_context.mutex); memset(dtm_cmd, 0, sizeof(struct ta_dtm_shared_memory)); dtm_cmd->cmd_id = TA_DTM_COMMAND__TOPOLOGY_ASSR_ENABLE; @@ -494,8 +481,6 @@ static bool enable_assr(void *handle, struct dc_link *link) res = false; } - mutex_unlock(&psp->dtm_context.mutex); - return res; } @@ -557,13 +542,11 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) (!!aconnector->base.state) ? aconnector->base.state->hdcp_content_type : -1); - mutex_lock(&hdcp_w->mutex); + guard(mutex)(&hdcp_w->mutex); mod_hdcp_add_display(&hdcp_w->hdcp, link, display, &hdcp_w->output); process_output(hdcp_w); - mutex_unlock(&hdcp_w->mutex); - } /** From 272385483e0b1cadfeae03548fb8798d178f13d9 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 28 Feb 2025 13:32:57 -0600 Subject: [PATCH 22/68] drm/amd/display: Drop unnecessary ret variable for enable_assr() [Why] enable_assr() has a res variable that only is changed in one block with no cleanup necessary. [How] Remove variable and return early from failure cases. Reviewed-by: Alex Hung Signed-off-by: Mario Limonciello Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 6f8f21e123c8..a3e93b2891f0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -457,7 +457,6 @@ static bool enable_assr(void *handle, struct dc_link *link) struct mod_hdcp hdcp = hdcp_work->hdcp; struct psp_context *psp = hdcp.config.psp.handle; struct ta_dtm_shared_memory *dtm_cmd; - bool res = true; if (!psp->dtm_context.context.initialized) { DRM_INFO("Failed to enable ASSR, DTM TA is not initialized."); @@ -478,10 +477,10 @@ static bool enable_assr(void *handle, struct dc_link *link) if (dtm_cmd->dtm_status != TA_DTM_STATUS__SUCCESS) { DRM_INFO("Failed to enable ASSR"); - res = false; + return false; } - return res; + return true; } static void update_config(void *handle, struct cp_psp_stream_config *config) From 756e58e83e89d372b94269c0cde61fe55da76947 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Mon, 3 Mar 2025 13:53:16 -0500 Subject: [PATCH 23/68] drm/amd/display: remove minimum Dispclk and apply oem panel timing. [why & how] 1. apply oem panel timing (not only on OLED) 2. remove MIN_DPP_DISP_CLK request in driver. This fix will apply for dcn31x but not sync with DML's output. Reviewed-by: Ovidiu Bunea Signed-off-by: Charlene Liu Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c | 2 -- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c | 2 -- drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 3 ++- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c index a0fb4481d2f1..19a15acd1509 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c @@ -194,8 +194,6 @@ static void dcn315_update_clocks(struct clk_mgr *clk_mgr_base, // workaround: Limit dppclk to 100Mhz to avoid lower eDP panel switch to plus 4K monitor underflow. if (new_clocks->dppclk_khz < MIN_DPP_DISP_CLK) new_clocks->dppclk_khz = MIN_DPP_DISP_CLK; - if (new_clocks->dispclk_khz < MIN_DPP_DISP_CLK) - new_clocks->dispclk_khz = MIN_DPP_DISP_CLK; if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) { if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c index c3e50c3aaa60..4b19d9cf27ce 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c @@ -201,8 +201,6 @@ static void dcn316_update_clocks(struct clk_mgr *clk_mgr_base, // workaround: Limit dppclk to 100Mhz to avoid lower eDP panel switch to plus 4K monitor underflow. if (new_clocks->dppclk_khz < 100000) new_clocks->dppclk_khz = 100000; - if (new_clocks->dispclk_khz < 100000) - new_clocks->dispclk_khz = 100000; if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) { if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index dbc6e533dcac..5656d10368ad 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1066,7 +1066,8 @@ void dce110_edp_backlight_control( DC_LOG_DC("edp_receiver_ready_T9 skipped\n"); } - if (!enable && link->dpcd_sink_ext_caps.bits.oled) { + if (!enable) { + /*follow oem panel config's requirement*/ pre_T11_delay += link->panel_config.pps.extra_pre_t11_ms; msleep(pre_T11_delay); } From 2f1b6b24b0ddc6e3342a6c5f731b79e57339f102 Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Mon, 3 Mar 2025 11:10:19 -0500 Subject: [PATCH 24/68] drm/amd/display: Promote DAL to 3.2.324 This version brings along following fixes: - Fix some Replay/PSR issue - Fix backlight brightness - Fix suspend issue - Fix visual confirm color - Add scoped mutexes for amdgpu_dm_dhcp Reviewed-by: Ovidiu Bunea Reviewed-by: Nicholas Kazlauskas Signed-off-by: Taimur Hassan Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index a62c4893e5ff..67e1bb6fa335 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -53,7 +53,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.323" +#define DC_VER "3.2.324" /** * MAX_SURFACES - representative of the upper bound of surfaces that can be piped to a single CRTC From f4f086de310026abfe28512dfa4984fb4648663a Mon Sep 17 00:00:00 2001 From: Ethan Carter Edwards Date: Thu, 27 Feb 2025 18:16:21 -0500 Subject: [PATCH 25/68] drm/amd/display: change kzalloc to kcalloc in dcn30_validate_bandwidth() We are trying to get rid of all multiplications from allocation functions to prevent integer overflows. Here the multiplication is probably safe, but using kcalloc() is more appropriate and improves readability. This patch has no effect on runtime behavior. Reviewed-by: Rodrigo Siqueira Reviewed-by: Alex Hung Signed-off-by: Ethan Carter Edwards Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c index 13202ce30d66..f01ced015072 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c @@ -2047,7 +2047,8 @@ bool dcn30_validate_bandwidth(struct dc *dc, int vlevel = 0; int pipe_cnt = 0; - display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL); + display_e2e_pipe_params_st *pipes = kcalloc(dc->res_pool->pipe_count, + sizeof(display_e2e_pipe_params_st), GFP_KERNEL); DC_LOGGER_INIT(dc->ctx->logger); BW_VAL_TRACE_COUNT(); From 934cb529e90c57f5766668fe13624624dae1d790 Mon Sep 17 00:00:00 2001 From: Ethan Carter Edwards Date: Thu, 27 Feb 2025 18:16:22 -0500 Subject: [PATCH 26/68] drm/amd/display: change kzalloc to kcalloc in dcn31_validate_bandwidth() We are trying to get rid of all multiplications from allocation functions to prevent integer overflows. Here the multiplication is probably safe, but using kcalloc() is more appropriate and improves readability. This patch has no effect on runtime behavior. Reviewed-by: Rodrigo Siqueira Reviewed-by: Alex Hung Signed-off-by: Ethan Carter Edwards Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c index 3c42ba8566cf..dddddbfef85f 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c @@ -1768,7 +1768,8 @@ bool dcn31_validate_bandwidth(struct dc *dc, int vlevel = 0; int pipe_cnt = 0; - display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL); + display_e2e_pipe_params_st *pipes = kcalloc(dc->res_pool->pipe_count, + sizeof(display_e2e_pipe_params_st), GFP_KERNEL); DC_LOGGER_INIT(dc->ctx->logger); BW_VAL_TRACE_COUNT(); From b17a94f2fe8225e47c8efe770c6c246488c76a20 Mon Sep 17 00:00:00 2001 From: Ethan Carter Edwards Date: Thu, 27 Feb 2025 18:16:23 -0500 Subject: [PATCH 27/68] drm/amd/display: change kzalloc to kcalloc in dcn314_validate_bandwidth() We are trying to get rid of all multiplications from allocation functions to prevent integer overflows. Here the multiplication is probably safe, but using kcalloc() is more appropriate and improves readability. This patch has no effect on runtime behavior. Reviewed-by: Rodrigo Siqueira Reviewed-by: Alex Hung Signed-off-by: Ethan Carter Edwards Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c index e3ba105034f8..26becc4cb804 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c @@ -1704,7 +1704,8 @@ bool dcn314_validate_bandwidth(struct dc *dc, int vlevel = 0; int pipe_cnt = 0; - display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL); + display_e2e_pipe_params_st *pipes = kcalloc(dc->res_pool->pipe_count, + sizeof(display_e2e_pipe_params_st), GFP_KERNEL); DC_LOGGER_INIT(dc->ctx->logger); BW_VAL_TRACE_COUNT(); From 315ce6c41aa9b913461a3e018ce7516435245787 Mon Sep 17 00:00:00 2001 From: Ethan Carter Edwards Date: Thu, 27 Feb 2025 18:16:24 -0500 Subject: [PATCH 28/68] drm/amd/display: change kzalloc to kcalloc in dml1_validate() We are trying to get rid of all multiplications from allocation functions to prevent integer overflows. Here the multiplication is probably safe, but using kcalloc() is more appropriate and improves readability. This patch has no effect on runtime behavior. Reviewed-by: Rodrigo Siqueira Reviewed-by: Alex Hung Signed-off-by: Ethan Carter Edwards Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 664302876019..2a59cc61ed8c 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -1749,7 +1749,8 @@ static bool dml1_validate(struct dc *dc, struct dc_state *context, bool fast_val int vlevel = 0; int pipe_cnt = 0; - display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL); + display_e2e_pipe_params_st *pipes = kcalloc(dc->res_pool->pipe_count, + sizeof(display_e2e_pipe_params_st), GFP_KERNEL); /* To handle Freesync properly, setting FreeSync DML parameters * to its default state for the first stage of validation From 02438acd252395628d74cfac692efbb676d21521 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 9 Mar 2025 12:26:50 -0400 Subject: [PATCH 29/68] drm/amdgpu/vce2: fix ip block reference Need to use the correct IP block type. VCE vs VCN. Fixes mclk issues on Hawaii. Suggested by selendym. Fixes: 82ae6619a450 ("drm/amdgpu: update the handle ptr in wait_for_idle") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3997 Reviewed-by: Boyuan Zhang Cc: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index 8c8c02606d25..bee3e904a6bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -284,7 +284,7 @@ static int vce_v2_0_stop(struct amdgpu_device *adev) return 0; } - ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCN); + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE); if (!ip_block) return -EINVAL; From 099f273eff9c4927be47e337ecf9b10df88a99ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Mon, 24 Feb 2025 22:02:21 -0300 Subject: [PATCH 30/68] drm/amdgpu: Trigger a wedged event for ring reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of only triggering a wedged event for complete GPU resets, trigger for ring resets. Regardless of the reset, it's useful for userspace to know that it happened because the kernel will reject further submissions from that app. Reviewed-by: Christian König Signed-off-by: André Almeida Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 1d26be3c6d9d..935df2cdcc16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -166,6 +166,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) if (amdgpu_ring_sched_ready(ring)) drm_sched_start(&ring->sched, 0); dev_err(adev->dev, "Ring %s reset succeeded\n", ring->sched.name); + drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE); goto exit; } dev_err(adev->dev, "Ring %s reset failure\n", ring->sched.name); From 0ee560d71f9ab3af61ffc07eadc55c5a36f09843 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 10 Mar 2025 13:47:02 +0300 Subject: [PATCH 31/68] drm/amdgpu/gfx: delete stray tabs These lines are indented one tab too far. Delete the extra tabs. Reviewed-by: Srinivasan Shanmugam Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index a194bf3347cb..984e6ff6e463 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -2002,8 +2002,8 @@ void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work) if (adev->kfd.init_complete) { WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]); WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]); - amdgpu_amdkfd_start_sched(adev, idx); - adev->gfx.kfd_sch_inactive[idx] = false; + amdgpu_amdkfd_start_sched(adev, idx); + adev->gfx.kfd_sch_inactive[idx] = false; } } mutex_unlock(&adev->enforce_isolation_mutex); From 5b1fa87f305050d17c553381c39ad8f1e17ce062 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 10 Mar 2025 13:47:25 +0300 Subject: [PATCH 32/68] drm/amdkfd: delete stray tab in kfd_dbg_set_mes_debug_mode() These lines are indented one tab more than they should be. Delete the stray tabs. Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index 12456c61ffa5..ba99e0f258ae 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -357,12 +357,12 @@ int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd, bool sq_trap_en) return 0; if (!pdd->proc_ctx_cpu_ptr) { - r = amdgpu_amdkfd_alloc_gtt_mem(adev, - AMDGPU_MES_PROC_CTX_SIZE, - &pdd->proc_ctx_bo, - &pdd->proc_ctx_gpu_addr, - &pdd->proc_ctx_cpu_ptr, - false); + r = amdgpu_amdkfd_alloc_gtt_mem(adev, + AMDGPU_MES_PROC_CTX_SIZE, + &pdd->proc_ctx_bo, + &pdd->proc_ctx_gpu_addr, + &pdd->proc_ctx_cpu_ptr, + false); if (r) { dev_err(adev->dev, "failed to allocate process context bo\n"); From 20fb56dfd8d20ae4406007d0601fa024d24a3301 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Sun, 9 Mar 2025 12:48:50 -0400 Subject: [PATCH 33/68] drm/amdgpu: prepare DCE6 uniformisation with DCE8 and DCE10 Let's begin the cleanup in sid.h to prevent warnings and errors when wiring sid.h into dce_v6_0.c. This is a bigger cleanup. Many defines found under sid.h have already been properly moved into the different "_d.h" and "_sh_mask.h", so they should have been already removed from sid.h and properly linked in where needed. Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 10 +- drivers/gpu/drm/amd/amdgpu/si.c | 68 +++--- drivers/gpu/drm/amd/amdgpu/sid.h | 336 +------------------------- 3 files changed, 43 insertions(+), 371 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index ac51b7a6e8d4..5fcb1a277f75 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -40,18 +40,24 @@ #include "amdgpu_connectors.h" #include "amdgpu_display.h" +#include "dce_v6_0.h" + #include "bif/bif_3_0_d.h" #include "bif/bif_3_0_sh_mask.h" + #include "oss/oss_1_0_d.h" #include "oss/oss_1_0_sh_mask.h" + #include "gca/gfx_6_0_d.h" #include "gca/gfx_6_0_sh_mask.h" +#include "gca/gfx_7_2_enum.h" + #include "gmc/gmc_6_0_d.h" #include "gmc/gmc_6_0_sh_mask.h" + #include "dce/dce_6_0_d.h" #include "dce/dce_6_0_sh_mask.h" -#include "gca/gfx_7_2_enum.h" -#include "dce_v6_0.h" + #include "si_enums.h" static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index f90e07375396..2247f6a94858 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1124,41 +1124,41 @@ static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = { {mmCP_STALLED_STAT3}, {GB_ADDR_CONFIG}, {MC_ARB_RAMCFG}, - {GB_TILE_MODE0}, - {GB_TILE_MODE1}, - {GB_TILE_MODE2}, - {GB_TILE_MODE3}, - {GB_TILE_MODE4}, - {GB_TILE_MODE5}, - {GB_TILE_MODE6}, - {GB_TILE_MODE7}, - {GB_TILE_MODE8}, - {GB_TILE_MODE9}, - {GB_TILE_MODE10}, - {GB_TILE_MODE11}, - {GB_TILE_MODE12}, - {GB_TILE_MODE13}, - {GB_TILE_MODE14}, - {GB_TILE_MODE15}, - {GB_TILE_MODE16}, - {GB_TILE_MODE17}, - {GB_TILE_MODE18}, - {GB_TILE_MODE19}, - {GB_TILE_MODE20}, - {GB_TILE_MODE21}, - {GB_TILE_MODE22}, - {GB_TILE_MODE23}, - {GB_TILE_MODE24}, - {GB_TILE_MODE25}, - {GB_TILE_MODE26}, - {GB_TILE_MODE27}, - {GB_TILE_MODE28}, - {GB_TILE_MODE29}, - {GB_TILE_MODE30}, - {GB_TILE_MODE31}, + {mmGB_TILE_MODE0}, + {mmGB_TILE_MODE1}, + {mmGB_TILE_MODE2}, + {mmGB_TILE_MODE3}, + {mmGB_TILE_MODE4}, + {mmGB_TILE_MODE5}, + {mmGB_TILE_MODE6}, + {mmGB_TILE_MODE7}, + {mmGB_TILE_MODE8}, + {mmGB_TILE_MODE9}, + {mmGB_TILE_MODE10}, + {mmGB_TILE_MODE11}, + {mmGB_TILE_MODE12}, + {mmGB_TILE_MODE13}, + {mmGB_TILE_MODE14}, + {mmGB_TILE_MODE15}, + {mmGB_TILE_MODE16}, + {mmGB_TILE_MODE17}, + {mmGB_TILE_MODE18}, + {mmGB_TILE_MODE19}, + {mmGB_TILE_MODE20}, + {mmGB_TILE_MODE21}, + {mmGB_TILE_MODE22}, + {mmGB_TILE_MODE23}, + {mmGB_TILE_MODE24}, + {mmGB_TILE_MODE25}, + {mmGB_TILE_MODE26}, + {mmGB_TILE_MODE27}, + {mmGB_TILE_MODE28}, + {mmGB_TILE_MODE29}, + {mmGB_TILE_MODE30}, + {mmGB_TILE_MODE31}, {CC_RB_BACKEND_DISABLE, true}, - {GC_USER_RB_BACKEND_DISABLE, true}, - {PA_SC_RASTER_CONFIG, true}, + {mmGC_USER_RB_BACKEND_DISABLE, true}, + {mmPA_SC_RASTER_CONFIG, true}, }; static uint32_t si_get_register_value(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h index 9a39cbfe6db9..2218fd99ee83 100644 --- a/drivers/gpu/drm/amd/amdgpu/sid.h +++ b/drivers/gpu/drm/amd/amdgpu/sid.h @@ -696,18 +696,6 @@ #define HDP_REG_COHERENCY_FLUSH_CNTL 0x1528 /* DCE6 ELD audio interface */ -#define AZ_F0_CODEC_ENDPOINT_INDEX 0x1780 -# define AZ_ENDPOINT_REG_INDEX(x) (((x) & 0xff) << 0) -# define AZ_ENDPOINT_REG_WRITE_EN (1 << 8) -#define AZ_F0_CODEC_ENDPOINT_DATA 0x1781 - -#define AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER 0x25 -#define SPEAKER_ALLOCATION(x) (((x) & 0x7f) << 0) -#define SPEAKER_ALLOCATION_MASK (0x7f << 0) -#define SPEAKER_ALLOCATION_SHIFT 0 -#define HDMI_CONNECTION (1 << 16) -#define DP_CONNECTION (1 << 17) - #define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0 0x28 /* LPCM */ #define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1 0x29 /* AC3 */ #define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2 0x2A /* MPEG1 */ @@ -909,26 +897,11 @@ #define CRTC_STATUS_FRAME_COUNT 0x1BA6 /* Audio clocks */ -#define DCCG_AUDIO_DTO_SOURCE 0x05ac -# define DCCG_AUDIO_DTO0_SOURCE_SEL(x) ((x) << 0) /* crtc0 - crtc5 */ -# define DCCG_AUDIO_DTO_SEL (1 << 4) /* 0=dto0 1=dto1 */ - #define DCCG_AUDIO_DTO0_PHASE 0x05b0 #define DCCG_AUDIO_DTO0_MODULE 0x05b4 #define DCCG_AUDIO_DTO1_PHASE 0x05c0 #define DCCG_AUDIO_DTO1_MODULE 0x05c4 -#define AFMT_AUDIO_SRC_CONTROL 0x1c4f -#define AFMT_AUDIO_SRC_SELECT(x) (((x) & 7) << 0) -/* AFMT_AUDIO_SRC_SELECT - * 0 = stream0 - * 1 = stream1 - * 2 = stream2 - * 3 = stream3 - * 4 = stream4 - * 5 = stream5 - */ - #define GRBM_CNTL 0x2000 #define GRBM_READ_TIMEOUT(x) ((x) << 0) @@ -977,30 +950,6 @@ #define SE_DB_BUSY (1 << 30) #define SE_CB_BUSY (1 << 31) -#define GRBM_SOFT_RESET 0x2008 -#define SOFT_RESET_CP (1 << 0) -#define SOFT_RESET_CB (1 << 1) -#define SOFT_RESET_RLC (1 << 2) -#define SOFT_RESET_DB (1 << 3) -#define SOFT_RESET_GDS (1 << 4) -#define SOFT_RESET_PA (1 << 5) -#define SOFT_RESET_SC (1 << 6) -#define SOFT_RESET_BCI (1 << 7) -#define SOFT_RESET_SPI (1 << 8) -#define SOFT_RESET_SX (1 << 10) -#define SOFT_RESET_TC (1 << 11) -#define SOFT_RESET_TA (1 << 12) -#define SOFT_RESET_VGT (1 << 14) -#define SOFT_RESET_IA (1 << 15) - -#define GRBM_GFX_INDEX 0x200B -#define INSTANCE_INDEX(x) ((x) << 0) -#define SH_INDEX(x) ((x) << 8) -#define SE_INDEX(x) ((x) << 16) -#define SH_BROADCAST_WRITES (1 << 29) -#define INSTANCE_BROADCAST_WRITES (1 << 30) -#define SE_BROADCAST_WRITES (1 << 31) - #define GRBM_INT_CNTL 0x2018 # define RDERR_INT_ENABLE (1 << 0) # define GUI_IDLE_INT_ENABLE (1 << 19) @@ -1045,16 +994,6 @@ #define VGT_VTX_VECT_EJECT_REG 0x222C -#define VGT_CACHE_INVALIDATION 0x2231 -#define CACHE_INVALIDATION(x) ((x) << 0) -#define VC_ONLY 0 -#define TC_ONLY 1 -#define VC_AND_TC 2 -#define AUTO_INVLD_EN(x) ((x) << 6) -#define NO_AUTO 0 -#define ES_AUTO 1 -#define GS_AUTO 2 -#define ES_AND_GS_AUTO 3 #define VGT_ESGS_RING_SIZE 0x2232 #define VGT_GSVS_RING_SIZE 0x2233 @@ -1072,11 +1011,6 @@ #define VGT_TF_MEMORY_BASE 0x226E -#define CC_GC_SHADER_ARRAY_CONFIG 0x226F -#define INACTIVE_CUS_MASK 0xFFFF0000 -#define INACTIVE_CUS_SHIFT 16 -#define GC_USER_SHADER_ARRAY_CONFIG 0x2270 - #define PA_CL_ENHANCE 0x2285 #define CLIP_VTX_REORDER_ENA (1 << 0) #define NUM_CLIP_SEQ(x) ((x) << 1) @@ -1169,89 +1103,6 @@ #define ROW_SIZE_MASK 0x30000000 #define ROW_SIZE_SHIFT 28 -#define GB_TILE_MODE0 0x2644 -# define MICRO_TILE_MODE(x) ((x) << 0) -# define ADDR_SURF_DISPLAY_MICRO_TILING 0 -# define ADDR_SURF_THIN_MICRO_TILING 1 -# define ADDR_SURF_DEPTH_MICRO_TILING 2 -# define ARRAY_MODE(x) ((x) << 2) -# define ARRAY_LINEAR_GENERAL 0 -# define ARRAY_LINEAR_ALIGNED 1 -# define ARRAY_1D_TILED_THIN1 2 -# define ARRAY_2D_TILED_THIN1 4 -# define PIPE_CONFIG(x) ((x) << 6) -# define ADDR_SURF_P2 0 -# define ADDR_SURF_P4_8x16 4 -# define ADDR_SURF_P4_16x16 5 -# define ADDR_SURF_P4_16x32 6 -# define ADDR_SURF_P4_32x32 7 -# define ADDR_SURF_P8_16x16_8x16 8 -# define ADDR_SURF_P8_16x32_8x16 9 -# define ADDR_SURF_P8_32x32_8x16 10 -# define ADDR_SURF_P8_16x32_16x16 11 -# define ADDR_SURF_P8_32x32_16x16 12 -# define ADDR_SURF_P8_32x32_16x32 13 -# define ADDR_SURF_P8_32x64_32x32 14 -# define TILE_SPLIT(x) ((x) << 11) -# define ADDR_SURF_TILE_SPLIT_64B 0 -# define ADDR_SURF_TILE_SPLIT_128B 1 -# define ADDR_SURF_TILE_SPLIT_256B 2 -# define ADDR_SURF_TILE_SPLIT_512B 3 -# define ADDR_SURF_TILE_SPLIT_1KB 4 -# define ADDR_SURF_TILE_SPLIT_2KB 5 -# define ADDR_SURF_TILE_SPLIT_4KB 6 -# define BANK_WIDTH(x) ((x) << 14) -# define ADDR_SURF_BANK_WIDTH_1 0 -# define ADDR_SURF_BANK_WIDTH_2 1 -# define ADDR_SURF_BANK_WIDTH_4 2 -# define ADDR_SURF_BANK_WIDTH_8 3 -# define BANK_HEIGHT(x) ((x) << 16) -# define ADDR_SURF_BANK_HEIGHT_1 0 -# define ADDR_SURF_BANK_HEIGHT_2 1 -# define ADDR_SURF_BANK_HEIGHT_4 2 -# define ADDR_SURF_BANK_HEIGHT_8 3 -# define MACRO_TILE_ASPECT(x) ((x) << 18) -# define ADDR_SURF_MACRO_ASPECT_1 0 -# define ADDR_SURF_MACRO_ASPECT_2 1 -# define ADDR_SURF_MACRO_ASPECT_4 2 -# define ADDR_SURF_MACRO_ASPECT_8 3 -# define NUM_BANKS(x) ((x) << 20) -# define ADDR_SURF_2_BANK 0 -# define ADDR_SURF_4_BANK 1 -# define ADDR_SURF_8_BANK 2 -# define ADDR_SURF_16_BANK 3 -#define GB_TILE_MODE1 0x2645 -#define GB_TILE_MODE2 0x2646 -#define GB_TILE_MODE3 0x2647 -#define GB_TILE_MODE4 0x2648 -#define GB_TILE_MODE5 0x2649 -#define GB_TILE_MODE6 0x264a -#define GB_TILE_MODE7 0x264b -#define GB_TILE_MODE8 0x264c -#define GB_TILE_MODE9 0x264d -#define GB_TILE_MODE10 0x264e -#define GB_TILE_MODE11 0x264f -#define GB_TILE_MODE12 0x2650 -#define GB_TILE_MODE13 0x2651 -#define GB_TILE_MODE14 0x2652 -#define GB_TILE_MODE15 0x2653 -#define GB_TILE_MODE16 0x2654 -#define GB_TILE_MODE17 0x2655 -#define GB_TILE_MODE18 0x2656 -#define GB_TILE_MODE19 0x2657 -#define GB_TILE_MODE20 0x2658 -#define GB_TILE_MODE21 0x2659 -#define GB_TILE_MODE22 0x265a -#define GB_TILE_MODE23 0x265b -#define GB_TILE_MODE24 0x265c -#define GB_TILE_MODE25 0x265d -#define GB_TILE_MODE26 0x265e -#define GB_TILE_MODE27 0x265f -#define GB_TILE_MODE28 0x2660 -#define GB_TILE_MODE29 0x2661 -#define GB_TILE_MODE30 0x2662 -#define GB_TILE_MODE31 0x2663 - #define CB_PERFCOUNTER0_SELECT0 0x2688 #define CB_PERFCOUNTER0_SELECT1 0x2689 #define CB_PERFCOUNTER1_SELECT0 0x268A @@ -1263,10 +1114,6 @@ #define CB_CGTT_SCLK_CTRL 0x2698 -#define GC_USER_RB_BACKEND_DISABLE 0x26DF -#define BACKEND_DISABLE_MASK 0x00FF0000 -#define BACKEND_DISABLE_SHIFT 16 - #define TCP_CHAN_STEER_LO 0x2B03 #define TCP_CHAN_STEER_HI 0x2B94 @@ -1320,101 +1167,12 @@ # define CP_RINGID1_INT_STAT (1 << 30) # define CP_RINGID0_INT_STAT (1 << 31) -#define CP_MEM_SLP_CNTL 0x3079 -# define CP_MEM_LS_EN (1 << 0) - -#define CP_DEBUG 0x307F - -#define RLC_CNTL 0x30C0 -# define RLC_ENABLE (1 << 0) -#define RLC_RL_BASE 0x30C1 -#define RLC_RL_SIZE 0x30C2 -#define RLC_LB_CNTL 0x30C3 -# define LOAD_BALANCE_ENABLE (1 << 0) -#define RLC_SAVE_AND_RESTORE_BASE 0x30C4 -#define RLC_LB_CNTR_MAX 0x30C5 -#define RLC_LB_CNTR_INIT 0x30C6 - -#define RLC_CLEAR_STATE_RESTORE_BASE 0x30C8 - -#define RLC_UCODE_ADDR 0x30CB -#define RLC_UCODE_DATA 0x30CC - -#define RLC_GPU_CLOCK_COUNT_LSB 0x30CE -#define RLC_GPU_CLOCK_COUNT_MSB 0x30CF -#define RLC_CAPTURE_GPU_CLOCK_COUNT 0x30D0 -#define RLC_MC_CNTL 0x30D1 -#define RLC_UCODE_CNTL 0x30D2 -#define RLC_STAT 0x30D3 -# define RLC_BUSY_STATUS (1 << 0) -# define GFX_POWER_STATUS (1 << 1) -# define GFX_CLOCK_STATUS (1 << 2) -# define GFX_LS_STATUS (1 << 3) - -#define RLC_PG_CNTL 0x30D7 -# define GFX_PG_ENABLE (1 << 0) -# define GFX_PG_SRC (1 << 1) - -#define RLC_CGTT_MGCG_OVERRIDE 0x3100 -#define RLC_CGCG_CGLS_CTRL 0x3101 -# define CGCG_EN (1 << 0) -# define CGLS_EN (1 << 1) - -#define RLC_TTOP_D 0x3105 -# define RLC_PUD(x) ((x) << 0) -# define RLC_PUD_MASK (0xff << 0) -# define RLC_PDD(x) ((x) << 8) -# define RLC_PDD_MASK (0xff << 8) -# define RLC_TTPD(x) ((x) << 16) -# define RLC_TTPD_MASK (0xff << 16) -# define RLC_MSD(x) ((x) << 24) -# define RLC_MSD_MASK (0xff << 24) - -#define RLC_LB_INIT_CU_MASK 0x3107 - -#define RLC_PG_AO_CU_MASK 0x310B -#define RLC_MAX_PG_CU 0x310C -# define MAX_PU_CU(x) ((x) << 0) -# define MAX_PU_CU_MASK (0xff << 0) -#define RLC_AUTO_PG_CTRL 0x310C -# define AUTO_PG_EN (1 << 0) -# define GRBM_REG_SGIT(x) ((x) << 3) -# define GRBM_REG_SGIT_MASK (0xffff << 3) -# define PG_AFTER_GRBM_REG_ST(x) ((x) << 19) -# define PG_AFTER_GRBM_REG_ST_MASK (0x1fff << 19) - -#define RLC_SERDES_WR_MASTER_MASK_0 0x3115 -#define RLC_SERDES_WR_MASTER_MASK_1 0x3116 -#define RLC_SERDES_WR_CTRL 0x3117 - -#define RLC_SERDES_MASTER_BUSY_0 0x3119 -#define RLC_SERDES_MASTER_BUSY_1 0x311A - -#define RLC_GCPM_GENERAL_3 0x311E - -#define DB_RENDER_CONTROL 0xA000 - -#define DB_DEPTH_INFO 0xA00F - -#define PA_SC_RASTER_CONFIG 0xA0D4 -# define RB_MAP_PKR0(x) ((x) << 0) -# define RB_MAP_PKR0_MASK (0x3 << 0) -# define RB_MAP_PKR1(x) ((x) << 2) -# define RB_MAP_PKR1_MASK (0x3 << 2) -# define RASTER_CONFIG_RB_MAP_0 0 -# define RASTER_CONFIG_RB_MAP_1 1 -# define RASTER_CONFIG_RB_MAP_2 2 -# define RASTER_CONFIG_RB_MAP_3 3 +// #define PA_SC_RASTER_CONFIG 0xA0D4 # define RB_XSEL2(x) ((x) << 4) # define RB_XSEL2_MASK (0x3 << 4) # define RB_XSEL (1 << 6) # define RB_YSEL (1 << 7) # define PKR_MAP(x) ((x) << 8) -# define PKR_MAP_MASK (0x3 << 8) -# define RASTER_CONFIG_PKR_MAP_0 0 -# define RASTER_CONFIG_PKR_MAP_1 1 -# define RASTER_CONFIG_PKR_MAP_2 2 -# define RASTER_CONFIG_PKR_MAP_3 3 # define PKR_XSEL(x) ((x) << 10) # define PKR_XSEL_MASK (0x3 << 10) # define PKR_YSEL(x) ((x) << 12) @@ -1426,56 +1184,11 @@ # define SC_YSEL(x) ((x) << 20) # define SC_YSEL_MASK (0x3 << 20) # define SE_MAP(x) ((x) << 24) -# define SE_MAP_MASK (0x3 << 24) -# define RASTER_CONFIG_SE_MAP_0 0 -# define RASTER_CONFIG_SE_MAP_1 1 -# define RASTER_CONFIG_SE_MAP_2 2 -# define RASTER_CONFIG_SE_MAP_3 3 # define SE_XSEL(x) ((x) << 26) # define SE_XSEL_MASK (0x3 << 26) # define SE_YSEL(x) ((x) << 28) # define SE_YSEL_MASK (0x3 << 28) - -#define VGT_EVENT_INITIATOR 0xA2A4 -# define SAMPLE_STREAMOUTSTATS1 (1 << 0) -# define SAMPLE_STREAMOUTSTATS2 (2 << 0) -# define SAMPLE_STREAMOUTSTATS3 (3 << 0) -# define CACHE_FLUSH_TS (4 << 0) -# define CACHE_FLUSH (6 << 0) -# define CS_PARTIAL_FLUSH (7 << 0) -# define VGT_STREAMOUT_RESET (10 << 0) -# define END_OF_PIPE_INCR_DE (11 << 0) -# define END_OF_PIPE_IB_END (12 << 0) -# define RST_PIX_CNT (13 << 0) -# define VS_PARTIAL_FLUSH (15 << 0) -# define PS_PARTIAL_FLUSH (16 << 0) -# define CACHE_FLUSH_AND_INV_TS_EVENT (20 << 0) -# define ZPASS_DONE (21 << 0) -# define CACHE_FLUSH_AND_INV_EVENT (22 << 0) -# define PERFCOUNTER_START (23 << 0) -# define PERFCOUNTER_STOP (24 << 0) -# define PIPELINESTAT_START (25 << 0) -# define PIPELINESTAT_STOP (26 << 0) -# define PERFCOUNTER_SAMPLE (27 << 0) -# define SAMPLE_PIPELINESTAT (30 << 0) -# define SAMPLE_STREAMOUTSTATS (32 << 0) -# define RESET_VTX_CNT (33 << 0) -# define VGT_FLUSH (36 << 0) -# define BOTTOM_OF_PIPE_TS (40 << 0) -# define DB_CACHE_FLUSH_AND_INV (42 << 0) -# define FLUSH_AND_INV_DB_DATA_TS (43 << 0) -# define FLUSH_AND_INV_DB_META (44 << 0) -# define FLUSH_AND_INV_CB_DATA_TS (45 << 0) -# define FLUSH_AND_INV_CB_META (46 << 0) -# define CS_DONE (47 << 0) -# define PS_DONE (48 << 0) -# define FLUSH_AND_INV_CB_PIXEL_DATA (49 << 0) -# define THREAD_TRACE_START (51 << 0) -# define THREAD_TRACE_STOP (52 << 0) -# define THREAD_TRACE_FLUSH (54 << 0) -# define THREAD_TRACE_FINISH (55 << 0) - /* PIF PHY0 registers idx/data 0x8/0xc */ #define PB0_PIF_CNTL 0x10 # define LS2_EXIT_TIME(x) ((x) << 17) @@ -2036,9 +1749,6 @@ #define EVERGREEN_DATA_FORMAT 0x1ac0 # define EVERGREEN_INTERLEAVE_EN (1 << 0) -#define MC_SHARED_CHMAP__NOOFCHAN_MASK 0xf000 -#define MC_SHARED_CHMAP__NOOFCHAN__SHIFT 0xc - #define R600_D1GRPH_ARRAY_MODE_LINEAR_GENERAL (0 << 20) #define R600_D1GRPH_ARRAY_MODE_LINEAR_ALIGNED (1 << 20) #define R600_D1GRPH_ARRAY_MODE_1D_TILED_THIN1 (2 << 20) @@ -2050,32 +1760,6 @@ #define R700_D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1847 #define R700_D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a47 -#define DISP_INTERRUPT_STATUS__LB_D1_VBLANK_INTERRUPT_MASK 0x8 -#define DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VBLANK_INTERRUPT_MASK 0x8 -#define DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VBLANK_INTERRUPT_MASK 0x8 -#define DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VBLANK_INTERRUPT_MASK 0x8 -#define DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VBLANK_INTERRUPT_MASK 0x8 -#define DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VBLANK_INTERRUPT_MASK 0x8 - -#define DISP_INTERRUPT_STATUS__LB_D1_VLINE_INTERRUPT_MASK 0x4 -#define DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VLINE_INTERRUPT_MASK 0x4 -#define DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VLINE_INTERRUPT_MASK 0x4 -#define DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VLINE_INTERRUPT_MASK 0x4 -#define DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VLINE_INTERRUPT_MASK 0x4 -#define DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VLINE_INTERRUPT_MASK 0x4 - -#define DISP_INTERRUPT_STATUS__DC_HPD1_INTERRUPT_MASK 0x20000 -#define DISP_INTERRUPT_STATUS_CONTINUE__DC_HPD2_INTERRUPT_MASK 0x20000 -#define DISP_INTERRUPT_STATUS_CONTINUE2__DC_HPD3_INTERRUPT_MASK 0x20000 -#define DISP_INTERRUPT_STATUS_CONTINUE3__DC_HPD4_INTERRUPT_MASK 0x20000 -#define DISP_INTERRUPT_STATUS_CONTINUE4__DC_HPD5_INTERRUPT_MASK 0x20000 -#define DISP_INTERRUPT_STATUS_CONTINUE5__DC_HPD6_INTERRUPT_MASK 0x20000 - -#define GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_OCCURRED_MASK 0x1 -#define GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_CLEAR_MASK 0x100 - -#define DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK 0x1 - #define R600_D1GRPH_SWAP_CONTROL 0x1843 #define R600_D1GRPH_SWAP_ENDIAN_NONE (0 << 0) #define R600_D1GRPH_SWAP_ENDIAN_16BIT (1 << 0) @@ -2099,8 +1783,6 @@ # define R600_SCK_PRESCALE_CRYSTAL_CLK_SHIFT 28 # define R600_SCK_PRESCALE_CRYSTAL_CLK_MASK (0xf << 28) -#define GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK 0x1 - #define FMT_BIT_DEPTH_CONTROL 0x1bf2 #define FMT_TRUNCATE_EN (1 << 0) #define FMT_TRUNCATE_DEPTH (1 << 4) @@ -2404,19 +2086,6 @@ #define mmSRBM_SOFT_RESET__xxSOFT_RESET_MC_MASK 0x800 #define mmSRBM_SOFT_RESET__xxSOFT_RESET_MC__SHIFT 0xb -#define VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x8 -#define VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x3 -#define VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x40 -#define VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x6 -#define VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x200 -#define VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x9 -#define VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x1000 -#define VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0xc -#define VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x8000 -#define VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0xf -#define VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x40000 -#define VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x12 - #define MC_SEQ_MISC0__MT__MASK 0xf0000000 #define MC_SEQ_MISC0__MT__GDDR1 0x10000000 #define MC_SEQ_MISC0__MT__DDR2 0x20000000 @@ -2426,10 +2095,7 @@ #define MC_SEQ_MISC0__MT__HBM 0x60000000 #define MC_SEQ_MISC0__MT__DDR3 0xB0000000 -#define GRBM_STATUS__GUI_ACTIVE_MASK 0x80000000 #define CP_INT_CNTL_RING__TIME_STAMP_INT_ENABLE_MASK 0x4000000 -#define CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK 0x800000 -#define CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK 0x400000 #define PACKET3_SEM_WAIT_ON_SIGNAL (0x1 << 12) #define PACKET3_SEM_SEL_SIGNAL (0x6 << 29) #define PACKET3_SEM_SEL_WAIT (0x7 << 29) From 760632fa2e3dbb13a9b55acbb960592628d274dd Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Sun, 9 Mar 2025 12:48:51 -0400 Subject: [PATCH 34/68] drm/amdgpu: fix SI's GB_ADDR_CONFIG_GOLDEN values and wire up sid.h in GFX6 By wiring up sid.h in GFX6, we end up with a few duplicated defines such as the golden registers. Let's clean this up. [TAHITI,VERDE, HAINAN]_GB_ADDR_CONFIG_GOLDEN were defined both in sid.h and under si_enums.h, with different values. Keep the values used under radeon and move them under gfx_v6_0.c where they are used (as it is done under cik) Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 15 +++++++++++++-- drivers/gpu/drm/amd/amdgpu/si_enums.h | 6 ------ drivers/gpu/drm/amd/amdgpu/sid.h | 4 ---- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 2f5cf87ede88..13fbee46417a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -28,19 +28,30 @@ #include "amdgpu_gfx.h" #include "amdgpu_ucode.h" #include "clearstate_si.h" +#include "si.h" +#include "sid.h" + #include "bif/bif_3_0_d.h" #include "bif/bif_3_0_sh_mask.h" + #include "oss/oss_1_0_d.h" #include "oss/oss_1_0_sh_mask.h" + #include "gca/gfx_6_0_d.h" #include "gca/gfx_6_0_sh_mask.h" +#include "gca/gfx_7_2_enum.h" + #include "gmc/gmc_6_0_d.h" #include "gmc/gmc_6_0_sh_mask.h" + #include "dce/dce_6_0_d.h" #include "dce/dce_6_0_sh_mask.h" -#include "gca/gfx_7_2_enum.h" + #include "si_enums.h" -#include "si.h" + +#define TAHITI_GB_ADDR_CONFIG_GOLDEN 0x12011003 +#define VERDE_GB_ADDR_CONFIG_GOLDEN 0x12010002 +#define HAINAN_GB_ADDR_CONFIG_GOLDEN 0x02010001 static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/si_enums.h b/drivers/gpu/drm/amd/amdgpu/si_enums.h index 4e935baa7b91..cd9b0a654991 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_enums.h +++ b/drivers/gpu/drm/amd/amdgpu/si_enums.h @@ -128,8 +128,6 @@ #define SI_CRTC4_REGISTER_OFFSET 0x2c00 #define SI_CRTC5_REGISTER_OFFSET 0x2f00 -#define DMA0_REGISTER_OFFSET 0x000 -#define DMA1_REGISTER_OFFSET 0x200 #define ES_AND_GS_AUTO 3 #define RADEON_PACKET_TYPE3 3 #define CE_PARTITION_BASE 3 @@ -161,10 +159,6 @@ #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D -#define TAHITI_GB_ADDR_CONFIG_GOLDEN 0x12011003 -#define VERDE_GB_ADDR_CONFIG_GOLDEN 0x02010002 -#define HAINAN_GB_ADDR_CONFIG_GOLDEN 0x02011003 - #define PACKET3(op, n) ((RADEON_PACKET_TYPE3 << 30) | \ (((op) & 0xFF) << 8) | \ ((n) & 0x3FFF) << 16) diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h index 2218fd99ee83..bf228a1dedff 100644 --- a/drivers/gpu/drm/amd/amdgpu/sid.h +++ b/drivers/gpu/drm/amd/amdgpu/sid.h @@ -26,10 +26,6 @@ #define TAHITI_RB_BITMAP_WIDTH_PER_SH 2 -#define TAHITI_GB_ADDR_CONFIG_GOLDEN 0x12011003 -#define VERDE_GB_ADDR_CONFIG_GOLDEN 0x12010002 -#define HAINAN_GB_ADDR_CONFIG_GOLDEN 0x02010001 - #define SI_MAX_SH_GPRS 256 #define SI_MAX_TEMP_GPRS 16 #define SI_MAX_SH_THREADS 256 From 42c854b8fb0cce512534aa2b7141948e80c6ebb0 Mon Sep 17 00:00:00 2001 From: Yifan Zha Date: Wed, 5 Mar 2025 13:14:55 +0800 Subject: [PATCH 35/68] drm/amd/amdkfd: Evict all queues even HWS remove queue failed [Why] If reset is detected and kfd need to evict working queues, HWS moving queue will be failed. Then remaining queues are not evicted and in active state. After reset done, kfd uses HWS to termination remaining activated queues but HWS is resetted. So remove queue will be failed again. [How] Keep removing all queues even if HWS returns failed. It will not affect cpsch as it checks reset_domain->sem. v2: If any queue failed, evict queue returns error. v3: Declare err inside the if-block. Reviewed-by: Felix Kuehling Signed-off-by: Yifan Zha Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 885e0e9cf21b..2ed003d3ff0e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1221,11 +1221,13 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, decrement_queue_count(dqm, qpd, q); if (dqm->dev->kfd->shared_resources.enable_mes) { - retval = remove_queue_mes(dqm, q, qpd); - if (retval) { + int err; + + err = remove_queue_mes(dqm, q, qpd); + if (err) { dev_err(dev, "Failed to evict queue %d\n", q->properties.queue_id); - goto out; + retval = err; } } } From 37c890d83161ff725a735d02afc52a021caaf7d6 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Sun, 9 Mar 2025 12:48:52 -0400 Subject: [PATCH 36/68] drm/amdgpu: finish wiring up sid.h in DCE6 For coherence with DCE8 et DCE10, add or move some values under sid.h and remove duplicated from si_enums.h. Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 63 ++++++++++++++------------- drivers/gpu/drm/amd/amdgpu/si_enums.h | 6 --- drivers/gpu/drm/amd/amdgpu/sid.h | 29 +++++++++--- 3 files changed, 55 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 5fcb1a277f75..255c70959343 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -41,6 +41,7 @@ #include "amdgpu_display.h" #include "dce_v6_0.h" +#include "sid.h" #include "bif/bif_3_0_d.h" #include "bif/bif_3_0_sh_mask.h" @@ -65,31 +66,31 @@ static void dce_v6_0_set_irq_funcs(struct amdgpu_device *adev); static const u32 crtc_offsets[6] = { - SI_CRTC0_REGISTER_OFFSET, - SI_CRTC1_REGISTER_OFFSET, - SI_CRTC2_REGISTER_OFFSET, - SI_CRTC3_REGISTER_OFFSET, - SI_CRTC4_REGISTER_OFFSET, - SI_CRTC5_REGISTER_OFFSET + CRTC0_REGISTER_OFFSET, + CRTC1_REGISTER_OFFSET, + CRTC2_REGISTER_OFFSET, + CRTC3_REGISTER_OFFSET, + CRTC4_REGISTER_OFFSET, + CRTC5_REGISTER_OFFSET }; static const u32 hpd_offsets[] = { - mmDC_HPD1_INT_STATUS - mmDC_HPD1_INT_STATUS, - mmDC_HPD2_INT_STATUS - mmDC_HPD1_INT_STATUS, - mmDC_HPD3_INT_STATUS - mmDC_HPD1_INT_STATUS, - mmDC_HPD4_INT_STATUS - mmDC_HPD1_INT_STATUS, - mmDC_HPD5_INT_STATUS - mmDC_HPD1_INT_STATUS, - mmDC_HPD6_INT_STATUS - mmDC_HPD1_INT_STATUS, + HPD0_REGISTER_OFFSET, + HPD1_REGISTER_OFFSET, + HPD2_REGISTER_OFFSET, + HPD3_REGISTER_OFFSET, + HPD4_REGISTER_OFFSET, + HPD5_REGISTER_OFFSET }; static const uint32_t dig_offsets[] = { - SI_CRTC0_REGISTER_OFFSET, - SI_CRTC1_REGISTER_OFFSET, - SI_CRTC2_REGISTER_OFFSET, - SI_CRTC3_REGISTER_OFFSET, - SI_CRTC4_REGISTER_OFFSET, - SI_CRTC5_REGISTER_OFFSET, + CRTC0_REGISTER_OFFSET, + CRTC1_REGISTER_OFFSET, + CRTC2_REGISTER_OFFSET, + CRTC3_REGISTER_OFFSET, + CRTC4_REGISTER_OFFSET, + CRTC5_REGISTER_OFFSET, (0x13830 - 0x7030) >> 2, }; @@ -1395,13 +1396,13 @@ static void dce_v6_0_audio_enable(struct amdgpu_device *adev, static const u32 pin_offsets[7] = { - (0x1780 - 0x1780), - (0x1786 - 0x1780), - (0x178c - 0x1780), - (0x1792 - 0x1780), - (0x1798 - 0x1780), - (0x179d - 0x1780), - (0x17a4 - 0x1780), + AUD0_REGISTER_OFFSET, + AUD1_REGISTER_OFFSET, + AUD2_REGISTER_OFFSET, + AUD3_REGISTER_OFFSET, + AUD4_REGISTER_OFFSET, + AUD5_REGISTER_OFFSET, + AUD6_REGISTER_OFFSET, }; static int dce_v6_0_audio_init(struct amdgpu_device *adev) @@ -2960,22 +2961,22 @@ static void dce_v6_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev, switch (crtc) { case 0: - reg_block = SI_CRTC0_REGISTER_OFFSET; + reg_block = CRTC0_REGISTER_OFFSET; break; case 1: - reg_block = SI_CRTC1_REGISTER_OFFSET; + reg_block = CRTC1_REGISTER_OFFSET; break; case 2: - reg_block = SI_CRTC2_REGISTER_OFFSET; + reg_block = CRTC2_REGISTER_OFFSET; break; case 3: - reg_block = SI_CRTC3_REGISTER_OFFSET; + reg_block = CRTC3_REGISTER_OFFSET; break; case 4: - reg_block = SI_CRTC4_REGISTER_OFFSET; + reg_block = CRTC4_REGISTER_OFFSET; break; case 5: - reg_block = SI_CRTC5_REGISTER_OFFSET; + reg_block = CRTC5_REGISTER_OFFSET; break; default: DRM_DEBUG("invalid crtc %d\n", crtc); diff --git a/drivers/gpu/drm/amd/amdgpu/si_enums.h b/drivers/gpu/drm/amd/amdgpu/si_enums.h index cd9b0a654991..d656ef1fa6e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_enums.h +++ b/drivers/gpu/drm/amd/amdgpu/si_enums.h @@ -121,12 +121,6 @@ #define CURSOR_UPDATE_LOCK (1 << 16) #define CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24) -#define SI_CRTC0_REGISTER_OFFSET 0 -#define SI_CRTC1_REGISTER_OFFSET 0x300 -#define SI_CRTC2_REGISTER_OFFSET 0x2600 -#define SI_CRTC3_REGISTER_OFFSET 0x2900 -#define SI_CRTC4_REGISTER_OFFSET 0x2c00 -#define SI_CRTC5_REGISTER_OFFSET 0x2f00 #define ES_AND_GS_AUTO 3 #define RADEON_PACKET_TYPE3 3 diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h index bf228a1dedff..cbf232f5235b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sid.h +++ b/drivers/gpu/drm/amd/amdgpu/sid.h @@ -1700,12 +1700,29 @@ //#dce stupp /* display controller offsets used for crtc/cur/lut/grph/viewport/etc. */ -#define SI_CRTC0_REGISTER_OFFSET 0 //(0x6df0 - 0x6df0)/4 -#define SI_CRTC1_REGISTER_OFFSET 0x300 //(0x79f0 - 0x6df0)/4 -#define SI_CRTC2_REGISTER_OFFSET 0x2600 //(0x105f0 - 0x6df0)/4 -#define SI_CRTC3_REGISTER_OFFSET 0x2900 //(0x111f0 - 0x6df0)/4 -#define SI_CRTC4_REGISTER_OFFSET 0x2c00 //(0x11df0 - 0x6df0)/4 -#define SI_CRTC5_REGISTER_OFFSET 0x2f00 //(0x129f0 - 0x6df0)/4 +#define CRTC0_REGISTER_OFFSET (0x1b7c - 0x1b7c) //(0x6df0 - 0x6df0)/4 +#define CRTC1_REGISTER_OFFSET (0x1e7c - 0x1b7c) //(0x79f0 - 0x6df0)/4 +#define CRTC2_REGISTER_OFFSET (0x417c - 0x1b7c) //(0x105f0 - 0x6df0)/4 +#define CRTC3_REGISTER_OFFSET (0x447c - 0x1b7c) //(0x111f0 - 0x6df0)/4 +#define CRTC4_REGISTER_OFFSET (0x477c - 0x1b7c) //(0x11df0 - 0x6df0)/4 +#define CRTC5_REGISTER_OFFSET (0x4a7c - 0x1b7c) //(0x129f0 - 0x6df0)/4 + +/* hpd instance offsets */ +#define HPD0_REGISTER_OFFSET (0x1807 - 0x1807) +#define HPD1_REGISTER_OFFSET (0x180a - 0x1807) +#define HPD2_REGISTER_OFFSET (0x180d - 0x1807) +#define HPD3_REGISTER_OFFSET (0x1810 - 0x1807) +#define HPD4_REGISTER_OFFSET (0x1813 - 0x1807) +#define HPD5_REGISTER_OFFSET (0x1816 - 0x1807) + +/* audio endpt instance offsets */ +#define AUD0_REGISTER_OFFSET (0x1780 - 0x1780) +#define AUD1_REGISTER_OFFSET (0x1786 - 0x1780) +#define AUD2_REGISTER_OFFSET (0x178c - 0x1780) +#define AUD3_REGISTER_OFFSET (0x1792 - 0x1780) +#define AUD4_REGISTER_OFFSET (0x1798 - 0x1780) +#define AUD5_REGISTER_OFFSET (0x179d - 0x1780) +#define AUD6_REGISTER_OFFSET (0x17a4 - 0x1780) #define CURSOR_WIDTH 64 #define CURSOR_HEIGHT 64 From 3e3fcd29b505cebed659311337ea03b7698767fc Mon Sep 17 00:00:00 2001 From: Natalie Vock Date: Mon, 10 Mar 2025 18:08:05 +0100 Subject: [PATCH 37/68] drm/amdgpu: NULL-check BO's backing store when determining GFX12 PTE flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PRT BOs may not have any backing store, so bo->tbo.resource will be NULL. Check for that before dereferencing. Fixes: 0cce5f285d9a ("drm/amdkfd: Check correct memory types for is_system variable") Reviewed-by: Christian König Signed-off-by: Natalie Vock Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index ea7c32d8380b..bf8d01da8815 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -528,8 +528,9 @@ static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev, bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT; - is_system = (bo->tbo.resource->mem_type == TTM_PL_TT) || - (bo->tbo.resource->mem_type == AMDGPU_PL_PREEMPT); + is_system = bo->tbo.resource && + (bo->tbo.resource->mem_type == TTM_PL_TT || + bo->tbo.resource->mem_type == AMDGPU_PL_PREEMPT); if (bo && bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) *flags |= AMDGPU_PTE_DCC; From 2d52de55f9ee7aaee0e09ac443f77855989c6b68 Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Tue, 11 Mar 2025 14:14:59 +0300 Subject: [PATCH 38/68] drm/radeon: fix uninitialized size issue in radeon_vce_cs_parse() On the off chance that command stream passed from userspace via ioctl() call to radeon_vce_cs_parse() is weirdly crafted and first command to execute is to encode (case 0x03000001), the function in question will attempt to call radeon_vce_cs_reloc() with size argument that has not been properly initialized. Specifically, 'size' will point to 'tmp' variable before the latter had a chance to be assigned any value. Play it safe and init 'tmp' with 0, thus ensuring that radeon_vce_cs_reloc() will catch an early error in cases like these. Found by Linux Verification Center (linuxtesting.org) with static analysis tool SVACE. Fixes: 2fc5703abda2 ("drm/radeon: check VCE relocation buffer range v3") Signed-off-by: Nikita Zhandarovich Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_vce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index d1871af967d4..2355a78e1b69 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -557,7 +557,7 @@ int radeon_vce_cs_parse(struct radeon_cs_parser *p) { int session_idx = -1; bool destroyed = false, created = false, allocated = false; - uint32_t tmp, handle = 0; + uint32_t tmp = 0, handle = 0; uint32_t *size = &tmp; int i, r = 0; From 1a0807feb97082bff2b1342dbbe55a2a9a8bdb88 Mon Sep 17 00:00:00 2001 From: David Rosca Date: Fri, 28 Feb 2025 13:32:46 +0100 Subject: [PATCH 39/68] drm/amdgpu: Fix MPEG2, MPEG4 and VC1 video caps max size 1920x1088 is the maximum supported resolution. Signed-off-by: David Rosca Acked-by: Alex Deucher Reviewed-by: Ruijing Dong Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 18 +++++++-------- drivers/gpu/drm/amd/amdgpu/soc15.c | 18 +++++++-------- drivers/gpu/drm/amd/amdgpu/vi.c | 36 +++++++++++++++--------------- 3 files changed, 36 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 8068f384f56c..a18e6fb9fa3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -78,10 +78,10 @@ static const struct amdgpu_video_codecs nv_video_codecs_encode = { /* Navi1x */ static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, @@ -104,10 +104,10 @@ static const struct amdgpu_video_codecs sc_video_codecs_encode = { }; static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn0[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, @@ -115,10 +115,10 @@ static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn0[] }; static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn1[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 8732f766947e..c5a752d8aee5 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -102,10 +102,10 @@ static const struct amdgpu_video_codecs vega_video_codecs_encode = /* Vega */ static const struct amdgpu_video_codec_info vega_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, }; @@ -119,10 +119,10 @@ static const struct amdgpu_video_codecs vega_video_codecs_decode = /* Raven */ static const struct amdgpu_video_codec_info rv_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 4096, 4096, 0)}, @@ -137,10 +137,10 @@ static const struct amdgpu_video_codecs rv_video_codecs_decode = /* Renoir, Arcturus */ static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 3bbbb75242d9..9b3510e53112 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -167,16 +167,16 @@ static const struct amdgpu_video_codec_info tonga_video_codecs_decode_array[] = { { .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, + .max_width = 1920, + .max_height = 1088, + .max_pixels_per_frame = 1920 * 1088, .max_level = 3, }, { .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, + .max_width = 1920, + .max_height = 1088, + .max_pixels_per_frame = 1920 * 1088, .max_level = 5, }, { @@ -188,9 +188,9 @@ static const struct amdgpu_video_codec_info tonga_video_codecs_decode_array[] = }, { .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, + .max_width = 1920, + .max_height = 1088, + .max_pixels_per_frame = 1920 * 1088, .max_level = 4, }, }; @@ -206,16 +206,16 @@ static const struct amdgpu_video_codec_info cz_video_codecs_decode_array[] = { { .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, + .max_width = 1920, + .max_height = 1088, + .max_pixels_per_frame = 1920 * 1088, .max_level = 3, }, { .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, + .max_width = 1920, + .max_height = 1088, + .max_pixels_per_frame = 1920 * 1088, .max_level = 5, }, { @@ -227,9 +227,9 @@ static const struct amdgpu_video_codec_info cz_video_codecs_decode_array[] = }, { .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, + .max_width = 1920, + .max_height = 1088, + .max_pixels_per_frame = 1920 * 1088, .max_level = 4, }, { From 6e0d2fde3ae8fdb5b47e10389f23ed2cb4daec5d Mon Sep 17 00:00:00 2001 From: David Rosca Date: Fri, 28 Feb 2025 13:34:49 +0100 Subject: [PATCH 40/68] drm/amdgpu: Fix JPEG video caps max size for navi1x and raven 8192x8192 is the maximum supported resolution. Signed-off-by: David Rosca Acked-by: Alex Deucher Reviewed-by: Ruijing Dong Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 2 +- drivers/gpu/drm/amd/amdgpu/soc15.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index a18e6fb9fa3f..32b824c3658f 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -83,7 +83,7 @@ static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 8192, 8192, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index c5a752d8aee5..d6db75258e66 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -124,7 +124,7 @@ static const struct amdgpu_video_codec_info rv_video_codecs_decode_array[] = {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 8192, 8192, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 4096, 4096, 0)}, }; From 0a6e7b06bdbead2e43d56a2274b7e0c9c86d536e Mon Sep 17 00:00:00 2001 From: David Rosca Date: Fri, 28 Feb 2025 14:12:10 +0100 Subject: [PATCH 41/68] drm/amdgpu: Remove JPEG from vega and carrizo video caps JPEG is only supported for VCN1+. Signed-off-by: David Rosca Acked-by: Alex Deucher Reviewed-by: Ruijing Dong Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 1 - drivers/gpu/drm/amd/amdgpu/vi.c | 7 ------- 2 files changed, 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index d6db75258e66..659eab9b90be 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -107,7 +107,6 @@ static const struct amdgpu_video_codec_info vega_video_codecs_decode_array[] = {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, }; static const struct amdgpu_video_codecs vega_video_codecs_decode = diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 9b3510e53112..86d8bc10d90a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -239,13 +239,6 @@ static const struct amdgpu_video_codec_info cz_video_codecs_decode_array[] = .max_pixels_per_frame = 4096 * 4096, .max_level = 186, }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, }; static const struct amdgpu_video_codecs cz_video_codecs_decode = From 19478f2011f8b53dee401c91423c4e0b73753e4f Mon Sep 17 00:00:00 2001 From: David Rosca Date: Fri, 28 Feb 2025 13:44:32 +0100 Subject: [PATCH 42/68] drm/amdgpu: Update SRIOV video codec caps There have been multiple fixes to the video caps that are missing for SRIOV. Update the SRIOV caps with correct values. Signed-off-by: David Rosca Acked-by: Alex Deucher Reviewed-by: Ruijing Dong Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 16 ++++++++-------- drivers/gpu/drm/amd/amdgpu/soc21.c | 10 ++-------- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 32b824c3658f..50e77d9b30af 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -141,23 +141,23 @@ static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] = { }; static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn0[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn1[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index dd5d04c068f9..ad36c96478a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -117,23 +117,17 @@ static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_encode_vcn1 = { }; static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn0[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn1[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; From 16fbc18cb07470cd33fb5f37ad181b51583e6dc0 Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Tue, 11 Mar 2025 14:15:18 -0400 Subject: [PATCH 43/68] drm/amd/pm: add unique_id for gfx12 Expose unique_id for gfx12 Signed-off-by: Harish Kasiviswanathan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 1d04f1b79ded..2179344e78d9 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2341,6 +2341,8 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): *states = ATTR_STATE_SUPPORTED; break; default: From b9e75bcb2b39e1202364d958ee4f27fd8a6f1313 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 6 Feb 2025 17:33:46 +0530 Subject: [PATCH 44/68] drm/amdgpu: Remove unsupported xgmi versions XGMI v4.8.0 is not used in any SOCs. Remove the associated functions. Also, ensure get_xgmi_info callback pointer is not NULL before calling the function. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 3 -- drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c | 37 ------------------- 2 files changed, 40 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index a4258127083d..967a992829bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -2772,9 +2772,6 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) break; } - if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(4, 8, 0)) - adev->gmc.xgmi.supported = true; - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 4, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index 17509f32f61a..deb95fab02df 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -505,42 +505,6 @@ static void gfxhub_v2_1_init(struct amdgpu_device *adev) hub->vmhub_funcs = &gfxhub_v2_1_vmhub_funcs; } -static int gfxhub_v2_1_get_xgmi_info(struct amdgpu_device *adev) -{ - u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmGCMC_VM_XGMI_LFB_CNTL); - u32 max_region = - REG_GET_FIELD(xgmi_lfb_cntl, GCMC_VM_XGMI_LFB_CNTL, PF_MAX_REGION); - u32 max_num_physical_nodes = 0; - u32 max_physical_node_id = 0; - - switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { - case IP_VERSION(4, 8, 0): - max_num_physical_nodes = 4; - max_physical_node_id = 3; - break; - default: - return -EINVAL; - } - - /* PF_MAX_REGION=0 means xgmi is disabled */ - if (max_region) { - adev->gmc.xgmi.num_physical_nodes = max_region + 1; - if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes) - return -EINVAL; - - adev->gmc.xgmi.physical_node_id = - REG_GET_FIELD(xgmi_lfb_cntl, GCMC_VM_XGMI_LFB_CNTL, PF_LFB_REGION); - if (adev->gmc.xgmi.physical_node_id > max_physical_node_id) - return -EINVAL; - - adev->gmc.xgmi.node_segment_size = REG_GET_FIELD( - RREG32_SOC15(GC, 0, mmGCMC_VM_XGMI_LFB_SIZE), - GCMC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24; - } - - return 0; -} - static void gfxhub_v2_1_utcl2_harvest(struct amdgpu_device *adev) { int i; @@ -696,7 +660,6 @@ const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = { .gart_disable = gfxhub_v2_1_gart_disable, .set_fault_enable_default = gfxhub_v2_1_set_fault_enable_default, .init = gfxhub_v2_1_init, - .get_xgmi_info = gfxhub_v2_1_get_xgmi_info, .utcl2_harvest = gfxhub_v2_1_utcl2_harvest, .mode2_save_regs = gfxhub_v2_1_save_regs, .mode2_restore_regs = gfxhub_v2_1_restore_regs, From f844732e3ad9c4b78df7436232949b8d2096d1a6 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Thu, 6 Mar 2025 08:40:01 +0800 Subject: [PATCH 45/68] drm/amdgpu: Fix the race condition for draining retry fault Issue: In the scenario where svm_range_restore_pages is called, but svm->checkpoint_ts has not been set and the retry fault has not been drained, svm_range_unmap_from_cpu is triggered and calls svm_range_free. Meanwhile, svm_range_restore_pages continues execution and reaches svm_range_from_addr. This results in a "failed to find prange..." error, causing the page recovery to fail. How to fix: Move the timestamp check code under the protection of svm->lock. v2: Make sure all right locks are released before go out. v3: Directly goto out_unlock_svms, and return -EAGAIN. v4: Refine code. Signed-off-by: Emily Deng Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 31 +++++++++++++++------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 1a38ac75abbd..63eeef2a75ba 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -3009,19 +3009,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, goto out; } - /* check if this page fault time stamp is before svms->checkpoint_ts */ - if (svms->checkpoint_ts[gpuidx] != 0) { - if (amdgpu_ih_ts_after_or_equal(ts, svms->checkpoint_ts[gpuidx])) { - pr_debug("draining retry fault, drop fault 0x%llx\n", addr); - r = 0; - goto out; - } else - /* ts is after svms->checkpoint_ts now, reset svms->checkpoint_ts - * to zero to avoid following ts wrap around give wrong comparing - */ - svms->checkpoint_ts[gpuidx] = 0; - } - if (!p->xnack_enabled) { pr_debug("XNACK not enabled for pasid 0x%x\n", pasid); r = -EFAULT; @@ -3041,6 +3028,21 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, mmap_read_lock(mm); retry_write_locked: mutex_lock(&svms->lock); + + /* check if this page fault time stamp is before svms->checkpoint_ts */ + if (svms->checkpoint_ts[gpuidx] != 0) { + if (amdgpu_ih_ts_after_or_equal(ts, svms->checkpoint_ts[gpuidx])) { + pr_debug("draining retry fault, drop fault 0x%llx\n", addr); + r = -EAGAIN; + goto out_unlock_svms; + } else { + /* ts is after svms->checkpoint_ts now, reset svms->checkpoint_ts + * to zero to avoid following ts wrap around give wrong comparing + */ + svms->checkpoint_ts[gpuidx] = 0; + } + } + prange = svm_range_from_addr(svms, addr, NULL); if (!prange) { pr_debug("failed to find prange svms 0x%p address [0x%llx]\n", @@ -3166,7 +3168,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, mutex_unlock(&svms->lock); mmap_read_unlock(mm); - svm_range_count_fault(node, p, gpuidx); + if (r != -EAGAIN) + svm_range_count_fault(node, p, gpuidx); mmput(mm); out: From e27b36ea6ba5f29e91fcfb375ea29503708fcf43 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 15:55:33 -0500 Subject: [PATCH 46/68] drm/amdgpu/gfx11: don't read registers in mqd init Just use the default values. There's not need to get the value from hardware and it could cause problems if we do that at runtime and gfxoff is active. Reviewed-by: Mukul Joshi Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 47 ++++++++++++++++++-------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 2b3ba404955d..b9f6d89dafb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -62,6 +62,23 @@ #define regPC_CONFIG_CNTL_1 0x194d #define regPC_CONFIG_CNTL_1_BASE_IDX 1 +#define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100 +#define regCP_GFX_HQD_VMID_DEFAULT 0x00000000 +#define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000 +#define regCP_GFX_HQD_QUANTUM_DEFAULT 0x00000a01 +#define regCP_GFX_HQD_CNTL_DEFAULT 0x00a00000 +#define regCP_RB_DOORBELL_CONTROL_DEFAULT 0x00000000 +#define regCP_GFX_HQD_RPTR_DEFAULT 0x00000000 + +#define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000006 +#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 +#define regCP_MQD_CONTROL_DEFAULT 0x00000100 +#define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509 +#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 +#define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000 +#define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0be05501 +#define regCP_HQD_IB_CONTROL_DEFAULT 0x00300000 + MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin"); @@ -3965,7 +3982,7 @@ static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev, if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH) priority = 1; - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY); + tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority); mqd->cp_gfx_hqd_queue_priority = tmp; } @@ -3987,14 +4004,14 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); /* set up mqd control */ - tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL); + tmp = regCP_GFX_MQD_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0); tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0); mqd->cp_gfx_mqd_control = tmp; /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */ - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID); + tmp = regCP_GFX_HQD_VMID_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); mqd->cp_gfx_hqd_vmid = 0; @@ -4002,7 +4019,7 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop); /* set up time quantum */ - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM); + tmp = regCP_GFX_HQD_QUANTUM_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1); mqd->cp_gfx_hqd_quantum = tmp; @@ -4024,7 +4041,7 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */ rb_bufsz = order_base_2(prop->queue_size / 4) - 1; - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL); + tmp = regCP_GFX_HQD_CNTL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz); tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2); #ifdef __BIG_ENDIAN @@ -4033,7 +4050,7 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_gfx_hqd_cntl = tmp; /* set up cp_doorbell_control */ - tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); + tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT; if (prop->use_doorbell) { tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_OFFSET, prop->doorbell_index); @@ -4045,7 +4062,7 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_rb_doorbell_control = tmp; /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR); + mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT; /* active the queue */ mqd->cp_gfx_hqd_active = 1; @@ -4131,14 +4148,14 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL); + tmp = regCP_HQD_EOP_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1)); mqd->cp_hqd_eop_control = tmp; /* enable doorbell? */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); + tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; if (prop->use_doorbell) { tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, @@ -4167,7 +4184,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); /* set MQD vmid to 0 */ - tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL); + tmp = regCP_MQD_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); mqd->cp_mqd_control = tmp; @@ -4177,7 +4194,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); /* set up the HQD, this is similar to CP_RB0_CNTL */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL); + tmp = regCP_HQD_PQ_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, (order_base_2(prop->queue_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, @@ -4203,7 +4220,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, tmp = 0; /* enable the doorbell if requested */ if (prop->use_doorbell) { - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); + tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_OFFSET, prop->doorbell_index); @@ -4218,17 +4235,17 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_pq_doorbell_control = tmp; /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR); + mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT; /* set the vmid for the queue */ mqd->cp_hqd_vmid = 0; - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE); + tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55); mqd->cp_hqd_persistent_state = tmp; /* set MIN_IB_AVAIL_SIZE */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL); + tmp = regCP_HQD_IB_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); mqd->cp_hqd_ib_control = tmp; From fc3c139cf0432b79fd08e23100a559ee51cd0be4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Feb 2025 16:08:03 -0500 Subject: [PATCH 47/68] drm/amdgpu/gfx12: don't read registers in mqd init Just use the default values. There's not need to get the value from hardware and it could cause problems if we do that at runtime and gfxoff is active. Reviewed-by: Mukul Joshi Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 48 ++++++++++++++++++-------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 926fb536bbff..87dc4195192c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -50,6 +50,24 @@ #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L +#define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100 +#define regCP_GFX_HQD_VMID_DEFAULT 0x00000000 +#define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000 +#define regCP_GFX_HQD_QUANTUM_DEFAULT 0x00000a01 +#define regCP_GFX_HQD_CNTL_DEFAULT 0x00f00000 +#define regCP_RB_DOORBELL_CONTROL_DEFAULT 0x00000000 +#define regCP_GFX_HQD_RPTR_DEFAULT 0x00000000 + +#define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000006 +#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 +#define regCP_MQD_CONTROL_DEFAULT 0x00000100 +#define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509 +#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 +#define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000 +#define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0be05501 +#define regCP_HQD_IB_CONTROL_DEFAULT 0x00300000 + + MODULE_FIRMWARE("amdgpu/gc_12_0_0_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_0_me.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_0_mec.bin"); @@ -2891,25 +2909,25 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); /* set up mqd control */ - tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL); + tmp = regCP_GFX_MQD_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0); tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0); mqd->cp_gfx_mqd_control = tmp; /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */ - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID); + tmp = regCP_GFX_HQD_VMID_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); mqd->cp_gfx_hqd_vmid = 0; /* set up default queue priority level * 0x0 = low priority, 0x1 = high priority */ - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY); + tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0); mqd->cp_gfx_hqd_queue_priority = tmp; /* set up time quantum */ - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM); + tmp = regCP_GFX_HQD_QUANTUM_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1); mqd->cp_gfx_hqd_quantum = tmp; @@ -2931,7 +2949,7 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */ rb_bufsz = order_base_2(prop->queue_size / 4) - 1; - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL); + tmp = regCP_GFX_HQD_CNTL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz); tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2); #ifdef __BIG_ENDIAN @@ -2940,7 +2958,7 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_gfx_hqd_cntl = tmp; /* set up cp_doorbell_control */ - tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); + tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT; if (prop->use_doorbell) { tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_OFFSET, prop->doorbell_index); @@ -2952,7 +2970,7 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_rb_doorbell_control = tmp; /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR); + mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT; /* active the queue */ mqd->cp_gfx_hqd_active = 1; @@ -3047,14 +3065,14 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL); + tmp = regCP_HQD_EOP_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, (order_base_2(GFX12_MEC_HPD_SIZE / 4) - 1)); mqd->cp_hqd_eop_control = tmp; /* enable doorbell? */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); + tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; if (prop->use_doorbell) { tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, @@ -3083,7 +3101,7 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); /* set MQD vmid to 0 */ - tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL); + tmp = regCP_MQD_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); mqd->cp_mqd_control = tmp; @@ -3093,7 +3111,7 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); /* set up the HQD, this is similar to CP_RB0_CNTL */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL); + tmp = regCP_HQD_PQ_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, (order_base_2(prop->queue_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, @@ -3118,7 +3136,7 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, tmp = 0; /* enable the doorbell if requested */ if (prop->use_doorbell) { - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); + tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_OFFSET, prop->doorbell_index); @@ -3133,17 +3151,17 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_pq_doorbell_control = tmp; /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR); + mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT; /* set the vmid for the queue */ mqd->cp_hqd_vmid = 0; - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE); + tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55); mqd->cp_hqd_persistent_state = tmp; /* set MIN_IB_AVAIL_SIZE */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL); + tmp = regCP_HQD_IB_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); mqd->cp_hqd_ib_control = tmp; From 02fc2f3c468d560c1daa17a55a88f33dc238f7ae Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Tue, 11 Mar 2025 18:17:33 +0800 Subject: [PATCH 48/68] drm/amd/pm: Update feature list for smu_v13_0_12 Update feature list for smu_v13_0_12 to show vcn & smu deep sleep feature enable status. Signed-off-by: Asad Kamal Reviewed-by: Hawking Zhang Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 4 +++- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index 9c8468fb203a..c9dee09395e3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -356,6 +356,7 @@ enum smu_clk_type { __SMU_DUMMY_MAP(DS_FCLK), \ __SMU_DUMMY_MAP(DS_MP1CLK), \ __SMU_DUMMY_MAP(DS_MP0CLK), \ + __SMU_DUMMY_MAP(DS_MPIOCLK), \ __SMU_DUMMY_MAP(XGMI_PER_LINK_PWR_DWN), \ __SMU_DUMMY_MAP(DPM_GFX_PACE), \ __SMU_DUMMY_MAP(MEM_VDDCI_SCALING), \ @@ -452,7 +453,8 @@ enum smu_clk_type { __SMU_DUMMY_MAP(APT_PF_DCS), \ __SMU_DUMMY_MAP(GFX_EDC_XVMIN), \ __SMU_DUMMY_MAP(GFX_DIDT_XVMIN), \ - __SMU_DUMMY_MAP(FAN_ABNORMAL), + __SMU_DUMMY_MAP(FAN_ABNORMAL), \ + __SMU_DUMMY_MAP(PIT), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(feature) SMU_FEATURE_##feature##_BIT diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c index 285dbfe10303..51078accc0f8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c @@ -76,6 +76,11 @@ const struct cmn2asic_mapping smu_v13_0_12_feature_mask_map[SMU_FEATURE_COUNT] = SMU_13_0_12_FEA_MAP(SMU_FEATURE_THERMAL_BIT, FEATURE_THERMAL), SMU_13_0_12_FEA_MAP(SMU_FEATURE_SOC_PCC_BIT, FEATURE_SOC_PCC), SMU_13_0_12_FEA_MAP(SMU_FEATURE_XGMI_PER_LINK_PWR_DWN_BIT, FEATURE_XGMI_PER_LINK_PWR_DOWN), + SMU_13_0_12_FEA_MAP(SMU_FEATURE_DS_VCN_BIT, FEATURE_DS_VCN), + SMU_13_0_12_FEA_MAP(SMU_FEATURE_DS_MP1CLK_BIT, FEATURE_DS_MP1CLK), + SMU_13_0_12_FEA_MAP(SMU_FEATURE_DS_MPIOCLK_BIT, FEATURE_DS_MPIOCLK), + SMU_13_0_12_FEA_MAP(SMU_FEATURE_DS_MP0CLK_BIT, FEATURE_DS_MP0CLK), + SMU_13_0_12_FEA_MAP(SMU_FEATURE_PIT_BIT, FEATURE_PIT), }; // clang-format off From 8a7820c07224bdae1ba704f295f9654cc2ed6691 Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Tue, 25 Feb 2025 15:50:30 -0500 Subject: [PATCH 49/68] drm/amdgpu: Reduce dequeue retry timeout for gfx9 family Dequeue retry timeout controls the interval between checks for unmet conditions. On MI series, reduce this from 0x40 to 0x1 (~ 1 uS). The cost of additional bandwidth consumed by CP when polling memory shouldn't be substantial. Signed-off-by: Harish Kasiviswanathan Reviewed-by: Jonathan Kim Signed-off-by: Alex Deucher --- .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 2 +- .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 2 +- .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c | 4 +- .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 28 ++++++------ .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h | 5 ++- .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c | 2 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 28 ++++++------ .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 5 ++- .../drm/amd/amdkfd/kfd_packet_manager_v9.c | 43 +++++++++++++------ .../gpu/drm/amd/include/kgd_kfd_interface.h | 5 ++- 10 files changed, 72 insertions(+), 52 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c index 6e861d08d044..7e9f7a280c1b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c @@ -189,7 +189,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = { .set_address_watch = kgd_gfx_aldebaran_set_address_watch, .clear_address_watch = kgd_gfx_v9_clear_address_watch, .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, - .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, + .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, .hqd_reset = kgd_gfx_v9_hqd_reset, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index c820418e8ccd..ffbaa8bc5eea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -415,7 +415,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { .set_address_watch = kgd_gfx_v9_set_address_watch, .clear_address_watch = kgd_gfx_v9_clear_address_watch, .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, - .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, + .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c index 0c0998477598..89a45a9218f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c @@ -541,8 +541,8 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = { .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, - .build_grace_period_packet_info = - kgd_gfx_v9_build_grace_period_packet_info, + .build_dequeue_wait_counts_packet_info = + kgd_gfx_v9_build_dequeue_wait_counts_packet_info, .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, .enable_debug_trap = kgd_aldebaran_enable_debug_trap, .disable_debug_trap = kgd_gfx_v9_4_3_disable_debug_trap, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 2887b6f3eaa2..04ef0ca10541 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -1021,25 +1021,25 @@ void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2)); } -void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev, +void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev, uint32_t wait_times, - uint32_t grace_period, + uint32_t sch_wave, + uint32_t que_sleep, uint32_t *reg_offset, uint32_t *reg_data) { *reg_data = wait_times; - /* - * The CP cannont handle a 0 grace period input and will result in - * an infinite grace period being set so set to 1 to prevent this. - */ - if (grace_period == 0) - grace_period = 1; - - *reg_data = REG_SET_FIELD(*reg_data, - CP_IQ_WAIT_TIME2, - SCH_WAVE, - grace_period); + if (sch_wave) + *reg_data = REG_SET_FIELD(*reg_data, + CP_IQ_WAIT_TIME2, + SCH_WAVE, + sch_wave); + if (que_sleep) + *reg_data = REG_SET_FIELD(*reg_data, + CP_IQ_WAIT_TIME2, + QUE_SLEEP, + que_sleep); *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2); } @@ -1115,7 +1115,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .set_address_watch = kgd_gfx_v10_set_address_watch, .clear_address_watch = kgd_gfx_v10_clear_address_watch, .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times, - .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info, + .build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info, .program_trap_handler_settings = program_trap_handler_settings, .hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr, .hqd_reset = kgd_gfx_v10_hqd_reset, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h index db577c2a847a..a4c607c88178 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h @@ -51,9 +51,10 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev, void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times, uint32_t inst); -void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev, +void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev, uint32_t wait_times, - uint32_t grace_period, + uint32_t sch_wave, + uint32_t que_sleep, uint32_t *reg_offset, uint32_t *reg_data); uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c index ac9ad505f9d7..6d08bc2781a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c @@ -673,7 +673,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3, .program_trap_handler_settings = program_trap_handler_settings_v10_3, .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times, - .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info, + .build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info, .enable_debug_trap = kgd_gfx_v10_enable_debug_trap, .disable_debug_trap = kgd_gfx_v10_disable_debug_trap, .validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 84135eb90660..088d09cc7a72 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -1077,25 +1077,25 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, adev->gfx.cu_info.max_waves_per_simd; } -void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, +void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev, uint32_t wait_times, - uint32_t grace_period, + uint32_t sch_wave, + uint32_t que_sleep, uint32_t *reg_offset, uint32_t *reg_data) { *reg_data = wait_times; - /* - * The CP cannot handle a 0 grace period input and will result in - * an infinite grace period being set so set to 1 to prevent this. - */ - if (grace_period == 0) - grace_period = 1; - - *reg_data = REG_SET_FIELD(*reg_data, - CP_IQ_WAIT_TIME2, - SCH_WAVE, - grace_period); + if (sch_wave) + *reg_data = REG_SET_FIELD(*reg_data, + CP_IQ_WAIT_TIME2, + SCH_WAVE, + sch_wave); + if (que_sleep) + *reg_data = REG_SET_FIELD(*reg_data, + CP_IQ_WAIT_TIME2, + QUE_SLEEP, + que_sleep); *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2); } @@ -1255,7 +1255,7 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .set_address_watch = kgd_gfx_v9_set_address_watch, .clear_address_watch = kgd_gfx_v9_clear_address_watch, .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, - .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, + .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index 90c8fa13d519..704452ca62f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -97,9 +97,10 @@ uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev, void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times, uint32_t inst); -void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, +void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev, uint32_t wait_times, - uint32_t grace_period, + uint32_t sch_wave, + uint32_t que_sleep, uint32_t *reg_offset, uint32_t *reg_data); uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index b9c611b249e6..d440df602393 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -298,13 +298,14 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, } static inline void pm_build_dequeue_wait_counts_packet_info(struct packet_manager *pm, - uint32_t sch_value, uint32_t *reg_offset, + uint32_t sch_value, uint32_t que_sleep, uint32_t *reg_offset, uint32_t *reg_data) { - pm->dqm->dev->kfd2kgd->build_grace_period_packet_info( + pm->dqm->dev->kfd2kgd->build_dequeue_wait_counts_packet_info( pm->dqm->dev->adev, pm->dqm->wait_times, sch_value, + que_sleep, reg_offset, reg_data); } @@ -319,27 +320,43 @@ static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm, uint32_t reg_data = 0; switch (cmd) { - case KFD_DEQUEUE_WAIT_INIT: - /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */ - if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu && - (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3))) - pm_build_dequeue_wait_counts_packet_info(pm, 1, ®_offset, ®_data); - else + case KFD_DEQUEUE_WAIT_INIT: { + uint32_t sch_wave = 0, que_sleep = 0; + /* Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default 0x40. + * On a 1GHz machine this is roughly 1 microsecond, which is + * about how long it takes to load data out of memory during + * queue connect + * QUE_SLEEP: Wait Count for Dequeue Retry. + */ + if (KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(9, 4, 1) && + KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(10, 0, 0)) { + que_sleep = 1; + + /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */ + if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu && + (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3))) + sch_wave = 1; + } else { return 0; + } + pm_build_dequeue_wait_counts_packet_info(pm, sch_wave, que_sleep, + ®_offset, ®_data); + break; + } case KFD_DEQUEUE_WAIT_RESET: - /* function called only to get reg_offset */ - pm_build_dequeue_wait_counts_packet_info(pm, 0, ®_offset, ®_data); - reg_data = pm->dqm->wait_times; + /* reg_data would be set to dqm->wait_times */ + pm_build_dequeue_wait_counts_packet_info(pm, 0, 0, ®_offset, ®_data); break; case KFD_DEQUEUE_WAIT_SET_SCH_WAVE: /* The CP cannot handle value 0 and it will result in - * an infinite grace period being set so set to 1 to prevent this. + * an infinite grace period being set so set to 1 to prevent this. Also + * avoid debugger API breakage as it sets 0 and expects a low value. */ if (!value) value = 1; - pm_build_dequeue_wait_counts_packet_info(pm, value, ®_offset, ®_data); + pm_build_dequeue_wait_counts_packet_info(pm, value, 0, ®_offset, ®_data); break; default: pr_err("Invalid dequeue wait cmd\n"); diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 1e8dfa6c0dc8..9aba8596faa7 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -313,9 +313,10 @@ struct kfd2kgd_calls { void (*get_iq_wait_times)(struct amdgpu_device *adev, uint32_t *wait_times, uint32_t inst); - void (*build_grace_period_packet_info)(struct amdgpu_device *adev, + void (*build_dequeue_wait_counts_packet_info)(struct amdgpu_device *adev, uint32_t wait_times, - uint32_t grace_period, + uint32_t sch_wave, + uint32_t que_sleep, uint32_t *reg_offset, uint32_t *reg_data); void (*get_cu_occupancy)(struct amdgpu_device *adev, From 90df6db62fa78a8ab0b705ec38db99c7973b95d6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 11 Mar 2025 10:34:36 -0400 Subject: [PATCH 50/68] drm/amdgpu/pm: wire up hwmon fan speed for smu 14.0.2 Add callbacks for fan speed fetching. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4034 Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 5cad09c5f2ff..e4089fd58711 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -1627,6 +1627,39 @@ static void smu_v14_0_2_get_unique_id(struct smu_context *smu) adev->unique_id = ((uint64_t)upper32 << 32) | lower32; } +static int smu_v14_0_2_get_fan_speed_pwm(struct smu_context *smu, + uint32_t *speed) +{ + int ret; + + if (!speed) + return -EINVAL; + + ret = smu_v14_0_2_get_smu_metrics_data(smu, + METRICS_CURR_FANPWM, + speed); + if (ret) { + dev_err(smu->adev->dev, "Failed to get fan speed(PWM)!"); + return ret; + } + + /* Convert the PMFW output which is in percent to pwm(255) based */ + *speed = min(*speed * 255 / 100, (uint32_t)255); + + return 0; +} + +static int smu_v14_0_2_get_fan_speed_rpm(struct smu_context *smu, + uint32_t *speed) +{ + if (!speed) + return -EINVAL; + + return smu_v14_0_2_get_smu_metrics_data(smu, + METRICS_CURR_FANSPEED, + speed); +} + static int smu_v14_0_2_get_power_limit(struct smu_context *smu, uint32_t *current_power_limit, uint32_t *default_power_limit, @@ -2804,6 +2837,8 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .set_performance_level = smu_v14_0_set_performance_level, .gfx_off_control = smu_v14_0_gfx_off_control, .get_unique_id = smu_v14_0_2_get_unique_id, + .get_fan_speed_pwm = smu_v14_0_2_get_fan_speed_pwm, + .get_fan_speed_rpm = smu_v14_0_2_get_fan_speed_rpm, .get_power_limit = smu_v14_0_2_get_power_limit, .set_power_limit = smu_v14_0_2_set_power_limit, .get_power_profile_mode = smu_v14_0_2_get_power_profile_mode, From 2bc016737a115469eb19a9a2d5d8d60ed38f6f82 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 11 Mar 2025 16:30:58 -0400 Subject: [PATCH 51/68] drm/amdgpu/pm: add VCN activity for renoir Wire up the query. Reviewed-by: Lijo Lazar Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index 37d82a71a2d7..9481f897432d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -1285,6 +1285,12 @@ static int renoir_read_sensor(struct smu_context *smu, (uint32_t *)data); *size = 4; break; + case AMDGPU_PP_SENSOR_VCN_LOAD: + ret = renoir_get_smu_metrics_data(smu, + METRICS_AVERAGE_VCNACTIVITY, + (uint32_t *)data); + *size = 4; + break; case AMDGPU_PP_SENSOR_EDGE_TEMP: ret = renoir_get_smu_metrics_data(smu, METRICS_TEMPERATURE_EDGE, From 357506799bacb21ba1ed7d64e02ac9c4a1f45b31 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 6 Feb 2025 17:40:42 +0530 Subject: [PATCH 52/68] drm/amdgpu: Calculate IP specific xgmi bandwidth Use IP version specific xgmi speed/width for bandwidth calculation. Signed-off-by: Lijo Lazar Reviewed-by: Jonathan Kim Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9e9fec5b52de..17ce1677378e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2757,6 +2757,9 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (!total) return -ENODEV; + if (adev->gmc.xgmi.supported) + amdgpu_xgmi_early_init(adev); + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); if (ip_block->status.valid != false) amdgpu_amdkfd_device_probe(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 7fdf30f1161c..6029a799074d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -844,7 +844,9 @@ int amdgpu_xgmi_get_bandwidth(struct amdgpu_device *adev, struct amdgpu_device * { bool peer_mode = bw_mode == AMDGPU_XGMI_BW_MODE_PER_PEER; int unit_scale = bw_unit == AMDGPU_XGMI_BW_UNIT_MBYTES ? 1000 : 1; - int speed = 25, num_lanes = 16, num_links = !peer_mode ? 1 : -1; + int num_lanes = adev->gmc.xgmi.max_width; + int speed = adev->gmc.xgmi.max_speed; + int num_links = !peer_mode ? 1 : -1; if (!(min_bw && max_bw)) return -EINVAL; From 6a87982b5897c387f5508fddfe2404726045517c Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Tue, 11 Mar 2025 11:10:17 -0600 Subject: [PATCH 53/68] drm/amd/display: Remove incorrect macro guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This macro guard "__cplusplus" is unnecessary and should not be there. Signed-off-by: Alex Hung Reviewed-by: Rodrigo Siqueira Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/sspl/dc_spl.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.h b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.h index 145961803a92..d621c42a237e 100644 --- a/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.h +++ b/drivers/gpu/drm/amd/display/dc/sspl/dc_spl.h @@ -17,9 +17,6 @@ #define SPL_EXPAND(a, b) SPL_EXPAND2(a, b) #define SPL_NAMESPACE(symbol) SPL_EXPAND(SPL_PFX_, symbol) -#ifdef __cplusplus -extern "C" { -#endif /* SPL interfaces */ From 2393c1a907c2e79cfb4a1fd6ecee9dc7110ad3d1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 11 Mar 2025 15:00:12 -0400 Subject: [PATCH 54/68] drm/amdgpu/pm: add VCN activity for SMU 13.0.0/7 Wire up the query. Reviewed-by: Lijo Lazar Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 10 ++++++++++ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 898487ad6cd2..5a9711e8cf68 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -836,6 +836,10 @@ static int smu_v13_0_0_get_smu_metrics_data(struct smu_context *smu, case METRICS_AVERAGE_MEMACTIVITY: *value = metrics->AverageUclkActivity; break; + case METRICS_AVERAGE_VCNACTIVITY: + *value = max(metrics->Vcn0ActivityPercentage, + metrics->Vcn1ActivityPercentage); + break; case METRICS_AVERAGE_SOCKETPOWER: *value = metrics->AverageSocketPower << 8; break; @@ -962,6 +966,12 @@ static int smu_v13_0_0_read_sensor(struct smu_context *smu, (uint32_t *)data); *size = 4; break; + case AMDGPU_PP_SENSOR_VCN_LOAD: + ret = smu_v13_0_0_get_smu_metrics_data(smu, + METRICS_AVERAGE_VCNACTIVITY, + (uint32_t *)data); + *size = 4; + break; case AMDGPU_PP_SENSOR_GPU_AVG_POWER: ret = smu_v13_0_0_get_smu_metrics_data(smu, METRICS_AVERAGE_SOCKETPOWER, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 19f47811f6db..c8f4f6fb4083 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -807,6 +807,10 @@ static int smu_v13_0_7_get_smu_metrics_data(struct smu_context *smu, else *value = metrics->AverageMemclkFrequencyPreDs; break; + case METRICS_AVERAGE_VCNACTIVITY: + *value = max(metrics->Vcn0ActivityPercentage, + metrics->Vcn1ActivityPercentage); + break; case METRICS_AVERAGE_VCLK: *value = metrics->AverageVclk0Frequency; break; @@ -951,6 +955,12 @@ static int smu_v13_0_7_read_sensor(struct smu_context *smu, (uint32_t *)data); *size = 4; break; + case AMDGPU_PP_SENSOR_VCN_LOAD: + ret = smu_v13_0_7_get_smu_metrics_data(smu, + METRICS_AVERAGE_VCNACTIVITY, + (uint32_t *)data); + *size = 4; + break; case AMDGPU_PP_SENSOR_GPU_AVG_POWER: ret = smu_v13_0_7_get_smu_metrics_data(smu, METRICS_AVERAGE_SOCKETPOWER, From 1b81674e0baf6661b12bbd0beaf68d9ed1afb04c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 11 Mar 2025 14:48:22 -0400 Subject: [PATCH 55/68] drm/amdgpu/pm: add VCN activity for SMU 14.0.2 Wire up the query. Reviewed-by: Lijo Lazar Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index e4089fd58711..21d1b34ada05 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -756,6 +756,10 @@ static int smu_v14_0_2_get_smu_metrics_data(struct smu_context *smu, case METRICS_AVERAGE_MEMACTIVITY: *value = metrics->AverageUclkActivity; break; + case METRICS_AVERAGE_VCNACTIVITY: + *value = max(metrics->AverageVcn0ActivityPercentage, + metrics->Vcn1ActivityPercentage); + break; case METRICS_AVERAGE_SOCKETPOWER: *value = metrics->AverageSocketPower << 8; break; @@ -882,6 +886,12 @@ static int smu_v14_0_2_read_sensor(struct smu_context *smu, (uint32_t *)data); *size = 4; break; + case AMDGPU_PP_SENSOR_VCN_LOAD: + ret = smu_v14_0_2_get_smu_metrics_data(smu, + METRICS_AVERAGE_VCNACTIVITY, + (uint32_t *)data); + *size = 4; + break; case AMDGPU_PP_SENSOR_GPU_AVG_POWER: ret = smu_v14_0_2_get_smu_metrics_data(smu, METRICS_AVERAGE_SOCKETPOWER, From 18537feb182d8506d4ee2778e11aad7c60e1453f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 11 Mar 2025 15:37:30 -0400 Subject: [PATCH 56/68] drm/amdgpu/pm: enable vcn busy sysfs for additional GC 11.x Make it visible for the all GC 11.x chips that support it. Reviewed-by: Lijo Lazar Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 2179344e78d9..c1f5ef7384b4 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2315,12 +2315,18 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(vcn_busy_percent)) { if (!(gc_ver == IP_VERSION(10, 3, 1) || - gc_ver == IP_VERSION(10, 3, 3) || - gc_ver == IP_VERSION(10, 3, 6) || - gc_ver == IP_VERSION(10, 3, 7) || - gc_ver == IP_VERSION(11, 0, 1) || - gc_ver == IP_VERSION(11, 0, 4) || - gc_ver == IP_VERSION(11, 5, 0))) + gc_ver == IP_VERSION(10, 3, 3) || + gc_ver == IP_VERSION(10, 3, 6) || + gc_ver == IP_VERSION(10, 3, 7) || + gc_ver == IP_VERSION(11, 0, 0) || + gc_ver == IP_VERSION(11, 0, 1) || + gc_ver == IP_VERSION(11, 0, 2) || + gc_ver == IP_VERSION(11, 0, 3) || + gc_ver == IP_VERSION(11, 0, 4) || + gc_ver == IP_VERSION(11, 5, 0) || + gc_ver == IP_VERSION(11, 5, 1) || + gc_ver == IP_VERSION(11, 5, 2) || + gc_ver == IP_VERSION(11, 5, 3))) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pcie_bw)) { /* PCIe Perf counters won't work on APU nodes */ From 7e0459d453b911435673edd7a86eadc600c63238 Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Fri, 7 Feb 2025 16:40:34 -0500 Subject: [PATCH 57/68] drm/amdkfd: Fix instruction hazard in gfx12 trap handler VALU instructions with SGPR source need wait states to avoid hazard with SALU using different SGPR. v2: Eliminate some hazards to reduce code explosion Signed-off-by: Jay Cornwall Reviewed-by: Lancelot Six Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 677 ++++++++++-------- .../amd/amdkfd/cwsr_trap_handler_gfx12.asm | 82 ++- 2 files changed, 404 insertions(+), 355 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index 651660958e5b..0320163b6e74 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -3644,7 +3644,7 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = { }; static const uint32_t cwsr_trap_gfx12_hex[] = { - 0xbfa00001, 0xbfa0024b, + 0xbfa00001, 0xbfa002a2, 0xb0804009, 0xb8f8f804, 0x9178ff78, 0x00008c00, 0xb8fbf811, 0x8b6eff78, @@ -3718,7 +3718,15 @@ static const uint32_t cwsr_trap_gfx12_hex[] = { 0x00011677, 0xd7610000, 0x00011a79, 0xd7610000, 0x00011c7e, 0xd7610000, - 0x00011e7f, 0xbefe00ff, + 0x00011e7f, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xbefe00ff, 0x00003fff, 0xbeff0080, 0xee0a407a, 0x000c0000, 0x00004000, 0xd760007a, @@ -3755,38 +3763,46 @@ static const uint32_t cwsr_trap_gfx12_hex[] = { 0x00000200, 0xbef600ff, 0x01000000, 0x7e000280, 0x7e020280, 0x7e040280, - 0xbefd0080, 0xbe804ec2, - 0xbf94fffe, 0xb8faf804, - 0x8b7a847a, 0x91788478, - 0x8c787a78, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xd7610002, 0x0000fa6c, - 0x807d817d, 0x917aff6d, - 0x80000000, 0xd7610002, - 0x0000fa7a, 0x807d817d, - 0xd7610002, 0x0000fa6e, - 0x807d817d, 0xd7610002, - 0x0000fa6f, 0x807d817d, - 0xd7610002, 0x0000fa78, - 0x807d817d, 0xb8faf811, - 0xd7610002, 0x0000fa7a, - 0x807d817d, 0xd7610002, - 0x0000fa7b, 0x807d817d, - 0xb8f1f801, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xb8f1f814, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xb8f1f815, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xb8f1f812, 0xd7610002, - 0x0000fa71, 0x807d817d, - 0xb8f1f813, 0xd7610002, - 0x0000fa71, 0x807d817d, + 0xbe804ec2, 0xbf94fffe, + 0xb8faf804, 0x8b7a847a, + 0x91788478, 0x8c787a78, + 0x917aff6d, 0x80000000, + 0xd7610002, 0x00010071, + 0xd7610002, 0x0001026c, + 0xd7610002, 0x0001047a, + 0xd7610002, 0x0001066e, + 0xd7610002, 0x0001086f, + 0xd7610002, 0x00010a78, + 0xd7610002, 0x00010e7b, + 0xd8500000, 0x00000000, + 0xd8500000, 0x00000000, + 0xd8500000, 0x00000000, + 0xd8500000, 0x00000000, + 0xd8500000, 0x00000000, + 0xd8500000, 0x00000000, + 0xd8500000, 0x00000000, + 0xd8500000, 0x00000000, + 0xb8faf811, 0xd7610002, + 0x00010c7a, 0xb8faf801, + 0xd7610002, 0x0001107a, + 0xb8faf814, 0xd7610002, + 0x0001127a, 0xb8faf815, + 0xd7610002, 0x0001147a, + 0xb8faf812, 0xd7610002, + 0x0001167a, 0xb8faf813, + 0xd7610002, 0x0001187a, 0xb8faf802, 0xd7610002, - 0x0000fa7a, 0x807d817d, - 0xbefa50c1, 0xbfc70000, - 0xd7610002, 0x0000fa7a, - 0x807d817d, 0xbefe00ff, + 0x00011a7a, 0xbefa50c1, + 0xbfc70000, 0xd7610002, + 0x00011c7a, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xbefe00ff, 0x0000ffff, 0xbeff0080, 0xc4068070, 0x008ce802, 0x00000000, 0xbefe00c1, @@ -3801,331 +3817,358 @@ static const uint32_t cwsr_trap_gfx12_hex[] = { 0xbe824102, 0xbe844104, 0xbe864106, 0xbe884108, 0xbe8a410a, 0xbe8c410c, - 0xbe8e410e, 0xd7610002, - 0x0000f200, 0x80798179, - 0xd7610002, 0x0000f201, - 0x80798179, 0xd7610002, - 0x0000f202, 0x80798179, - 0xd7610002, 0x0000f203, - 0x80798179, 0xd7610002, - 0x0000f204, 0x80798179, - 0xd7610002, 0x0000f205, - 0x80798179, 0xd7610002, - 0x0000f206, 0x80798179, - 0xd7610002, 0x0000f207, - 0x80798179, 0xd7610002, - 0x0000f208, 0x80798179, - 0xd7610002, 0x0000f209, - 0x80798179, 0xd7610002, - 0x0000f20a, 0x80798179, - 0xd7610002, 0x0000f20b, - 0x80798179, 0xd7610002, - 0x0000f20c, 0x80798179, - 0xd7610002, 0x0000f20d, - 0x80798179, 0xd7610002, - 0x0000f20e, 0x80798179, - 0xd7610002, 0x0000f20f, - 0x80798179, 0xbf06a079, - 0xbfa10007, 0xc4068070, + 0xbe8e410e, 0xbf068079, + 0xbfa10032, 0xd7610002, + 0x00010000, 0xd7610002, + 0x00010201, 0xd7610002, + 0x00010402, 0xd7610002, + 0x00010603, 0xd7610002, + 0x00010804, 0xd7610002, + 0x00010a05, 0xd7610002, + 0x00010c06, 0xd7610002, + 0x00010e07, 0xd7610002, + 0x00011008, 0xd7610002, + 0x00011209, 0xd7610002, + 0x0001140a, 0xd7610002, + 0x0001160b, 0xd7610002, + 0x0001180c, 0xd7610002, + 0x00011a0d, 0xd7610002, + 0x00011c0e, 0xd7610002, + 0x00011e0f, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0x80799079, + 0xbfa00038, 0xd7610002, + 0x00012000, 0xd7610002, + 0x00012201, 0xd7610002, + 0x00012402, 0xd7610002, + 0x00012603, 0xd7610002, + 0x00012804, 0xd7610002, + 0x00012a05, 0xd7610002, + 0x00012c06, 0xd7610002, + 0x00012e07, 0xd7610002, + 0x00013008, 0xd7610002, + 0x00013209, 0xd7610002, + 0x0001340a, 0xd7610002, + 0x0001360b, 0xd7610002, + 0x0001380c, 0xd7610002, + 0x00013a0d, 0xd7610002, + 0x00013c0e, 0xd7610002, + 0x00013e0f, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0x80799079, + 0xc4068070, 0x008ce802, + 0x00000000, 0x8070ff70, + 0x00000080, 0xbef90080, + 0x7e040280, 0x807d907d, + 0xbf0aff7d, 0x00000060, + 0xbfa2ff88, 0xbe804100, + 0xbe824102, 0xbe844104, + 0xbe864106, 0xbe884108, + 0xbe8a410a, 0xd7610002, + 0x00010000, 0xd7610002, + 0x00010201, 0xd7610002, + 0x00010402, 0xd7610002, + 0x00010603, 0xd7610002, + 0x00010804, 0xd7610002, + 0x00010a05, 0xd7610002, + 0x00010c06, 0xd7610002, + 0x00010e07, 0xd7610002, + 0x00011008, 0xd7610002, + 0x00011209, 0xd7610002, + 0x0001140a, 0xd7610002, + 0x0001160b, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xd8500000, + 0x00000000, 0xc4068070, 0x008ce802, 0x00000000, + 0xbefe00c1, 0x857d9973, + 0x8b7d817d, 0xbf06817d, + 0xbfa20002, 0xbeff0080, + 0xbfa00001, 0xbeff00c1, + 0xb8fb4306, 0x8b7bc17b, + 0xbfa10044, 0x8b7aff6d, + 0x80000000, 0xbfa10041, + 0x847b897b, 0xbef6007b, + 0xb8f03b05, 0x80708170, + 0xbf0d9973, 0xbfa20002, + 0x84708970, 0xbfa00001, + 0x84708a70, 0xb8fa1e06, + 0x847a8a7a, 0x80707a70, + 0x8070ff70, 0x00000200, 0x8070ff70, 0x00000080, - 0xbef90080, 0x7e040280, - 0x807d907d, 0xbf0aff7d, - 0x00000060, 0xbfa2ffbb, - 0xbe804100, 0xbe824102, - 0xbe844104, 0xbe864106, - 0xbe884108, 0xbe8a410a, - 0xd7610002, 0x0000f200, - 0x80798179, 0xd7610002, - 0x0000f201, 0x80798179, - 0xd7610002, 0x0000f202, - 0x80798179, 0xd7610002, - 0x0000f203, 0x80798179, - 0xd7610002, 0x0000f204, - 0x80798179, 0xd7610002, - 0x0000f205, 0x80798179, - 0xd7610002, 0x0000f206, - 0x80798179, 0xd7610002, - 0x0000f207, 0x80798179, - 0xd7610002, 0x0000f208, - 0x80798179, 0xd7610002, - 0x0000f209, 0x80798179, - 0xd7610002, 0x0000f20a, - 0x80798179, 0xd7610002, - 0x0000f20b, 0x80798179, - 0xc4068070, 0x008ce802, - 0x00000000, 0xbefe00c1, - 0x857d9973, 0x8b7d817d, - 0xbf06817d, 0xbfa20002, - 0xbeff0080, 0xbfa00001, - 0xbeff00c1, 0xb8fb4306, - 0x8b7bc17b, 0xbfa10044, - 0x8b7aff6d, 0x80000000, - 0xbfa10041, 0x847b897b, - 0xbef6007b, 0xb8f03b05, - 0x80708170, 0xbf0d9973, - 0xbfa20002, 0x84708970, - 0xbfa00001, 0x84708a70, - 0xb8fa1e06, 0x847a8a7a, - 0x80707a70, 0x8070ff70, - 0x00000200, 0x8070ff70, - 0x00000080, 0xbef600ff, - 0x01000000, 0xd71f0000, - 0x000100c1, 0xd7200000, - 0x000200c1, 0x16000084, - 0x857d9973, 0x8b7d817d, - 0xbf06817d, 0xbefd0080, - 0xbfa20013, 0xbe8300ff, - 0x00000080, 0xbf800000, - 0xbf800000, 0xbf800000, - 0xd8d80000, 0x01000000, - 0xbf8a0000, 0xc4068070, - 0x008ce801, 0x00000000, - 0x807d037d, 0x80700370, - 0xd5250000, 0x0001ff00, - 0x00000080, 0xbf0a7b7d, - 0xbfa2fff3, 0xbfa00012, - 0xbe8300ff, 0x00000100, + 0xbef600ff, 0x01000000, + 0xd71f0000, 0x000100c1, + 0xd7200000, 0x000200c1, + 0x16000084, 0x857d9973, + 0x8b7d817d, 0xbf06817d, + 0xbefd0080, 0xbfa20013, + 0xbe8300ff, 0x00000080, 0xbf800000, 0xbf800000, 0xbf800000, 0xd8d80000, 0x01000000, 0xbf8a0000, 0xc4068070, 0x008ce801, 0x00000000, 0x807d037d, 0x80700370, 0xd5250000, - 0x0001ff00, 0x00000100, + 0x0001ff00, 0x00000080, 0xbf0a7b7d, 0xbfa2fff3, - 0xbefe00c1, 0x857d9973, - 0x8b7d817d, 0xbf06817d, - 0xbfa20004, 0xbef000ff, - 0x00000200, 0xbeff0080, - 0xbfa00003, 0xbef000ff, - 0x00000400, 0xbeff00c1, - 0xb8fb3b05, 0x807b817b, - 0x847b827b, 0x857d9973, - 0x8b7d817d, 0xbf06817d, - 0xbfa2001b, 0xbef600ff, - 0x01000000, 0xbefd0084, - 0xbf0a7b7d, 0xbfa10040, - 0x7e008700, 0x7e028701, - 0x7e048702, 0x7e068703, - 0xc4068070, 0x008ce800, - 0x00000000, 0xc4068070, - 0x008ce801, 0x00008000, - 0xc4068070, 0x008ce802, - 0x00010000, 0xc4068070, - 0x008ce803, 0x00018000, - 0x807d847d, 0x8070ff70, - 0x00000200, 0xbf0a7b7d, - 0xbfa2ffeb, 0xbfa0002a, + 0xbfa00012, 0xbe8300ff, + 0x00000100, 0xbf800000, + 0xbf800000, 0xbf800000, + 0xd8d80000, 0x01000000, + 0xbf8a0000, 0xc4068070, + 0x008ce801, 0x00000000, + 0x807d037d, 0x80700370, + 0xd5250000, 0x0001ff00, + 0x00000100, 0xbf0a7b7d, + 0xbfa2fff3, 0xbefe00c1, + 0x857d9973, 0x8b7d817d, + 0xbf06817d, 0xbfa20004, + 0xbef000ff, 0x00000200, + 0xbeff0080, 0xbfa00003, + 0xbef000ff, 0x00000400, + 0xbeff00c1, 0xb8fb3b05, + 0x807b817b, 0x847b827b, + 0x857d9973, 0x8b7d817d, + 0xbf06817d, 0xbfa2001b, 0xbef600ff, 0x01000000, 0xbefd0084, 0xbf0a7b7d, - 0xbfa10015, 0x7e008700, + 0xbfa10040, 0x7e008700, 0x7e028701, 0x7e048702, 0x7e068703, 0xc4068070, 0x008ce800, 0x00000000, 0xc4068070, 0x008ce801, - 0x00010000, 0xc4068070, - 0x008ce802, 0x00020000, + 0x00008000, 0xc4068070, + 0x008ce802, 0x00010000, 0xc4068070, 0x008ce803, - 0x00030000, 0x807d847d, - 0x8070ff70, 0x00000400, + 0x00018000, 0x807d847d, + 0x8070ff70, 0x00000200, 0xbf0a7b7d, 0xbfa2ffeb, - 0xb8fb1e06, 0x8b7bc17b, - 0xbfa1000d, 0x847b837b, - 0x807b7d7b, 0xbefe00c1, - 0xbeff0080, 0x7e008700, + 0xbfa0002a, 0xbef600ff, + 0x01000000, 0xbefd0084, + 0xbf0a7b7d, 0xbfa10015, + 0x7e008700, 0x7e028701, + 0x7e048702, 0x7e068703, 0xc4068070, 0x008ce800, - 0x00000000, 0x807d817d, - 0x8070ff70, 0x00000080, - 0xbf0a7b7d, 0xbfa2fff7, - 0xbfa0016e, 0xbef4007e, - 0x8b75ff7f, 0x0000ffff, - 0x8c75ff75, 0x00040000, - 0xbef60080, 0xbef700ff, - 0x10807fac, 0xbef1007f, - 0xb8f20742, 0x84729972, - 0x8b6eff7f, 0x04000000, - 0xbfa1003b, 0xbefe00c1, - 0x857d9972, 0x8b7d817d, - 0xbf06817d, 0xbfa20002, - 0xbeff0080, 0xbfa00001, - 0xbeff00c1, 0xb8ef4306, - 0x8b6fc16f, 0xbfa10030, - 0x846f896f, 0xbef6006f, + 0x00000000, 0xc4068070, + 0x008ce801, 0x00010000, + 0xc4068070, 0x008ce802, + 0x00020000, 0xc4068070, + 0x008ce803, 0x00030000, + 0x807d847d, 0x8070ff70, + 0x00000400, 0xbf0a7b7d, + 0xbfa2ffeb, 0xb8fb1e06, + 0x8b7bc17b, 0xbfa1000d, + 0x847b837b, 0x807b7d7b, + 0xbefe00c1, 0xbeff0080, + 0x7e008700, 0xc4068070, + 0x008ce800, 0x00000000, + 0x807d817d, 0x8070ff70, + 0x00000080, 0xbf0a7b7d, + 0xbfa2fff7, 0xbfa0016e, + 0xbef4007e, 0x8b75ff7f, + 0x0000ffff, 0x8c75ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x10807fac, + 0xbef1007f, 0xb8f20742, + 0x84729972, 0x8b6eff7f, + 0x04000000, 0xbfa1003b, + 0xbefe00c1, 0x857d9972, + 0x8b7d817d, 0xbf06817d, + 0xbfa20002, 0xbeff0080, + 0xbfa00001, 0xbeff00c1, + 0xb8ef4306, 0x8b6fc16f, + 0xbfa10030, 0x846f896f, + 0xbef6006f, 0xb8f83b05, + 0x80788178, 0xbf0d9972, + 0xbfa20002, 0x84788978, + 0xbfa00001, 0x84788a78, + 0xb8ee1e06, 0x846e8a6e, + 0x80786e78, 0x8078ff78, + 0x00000200, 0x8078ff78, + 0x00000080, 0xbef600ff, + 0x01000000, 0x857d9972, + 0x8b7d817d, 0xbf06817d, + 0xbefd0080, 0xbfa2000d, + 0xc4050078, 0x0080e800, + 0x00000000, 0xbf8a0000, + 0xdac00000, 0x00000000, + 0x807dff7d, 0x00000080, + 0x8078ff78, 0x00000080, + 0xbf0a6f7d, 0xbfa2fff4, + 0xbfa0000c, 0xc4050078, + 0x0080e800, 0x00000000, + 0xbf8a0000, 0xdac00000, + 0x00000000, 0x807dff7d, + 0x00000100, 0x8078ff78, + 0x00000100, 0xbf0a6f7d, + 0xbfa2fff4, 0xbef80080, + 0xbefe00c1, 0x857d9972, + 0x8b7d817d, 0xbf06817d, + 0xbfa20002, 0xbeff0080, + 0xbfa00001, 0xbeff00c1, + 0xb8ef3b05, 0x806f816f, + 0x846f826f, 0x857d9972, + 0x8b7d817d, 0xbf06817d, + 0xbfa2002c, 0xbef600ff, + 0x01000000, 0xbeee0078, + 0x8078ff78, 0x00000200, + 0xbefd0084, 0xbf0a6f7d, + 0xbfa10061, 0xc4050078, + 0x008ce800, 0x00000000, + 0xc4050078, 0x008ce801, + 0x00008000, 0xc4050078, + 0x008ce802, 0x00010000, + 0xc4050078, 0x008ce803, + 0x00018000, 0xbf8a0000, + 0x7e008500, 0x7e028501, + 0x7e048502, 0x7e068503, + 0x807d847d, 0x8078ff78, + 0x00000200, 0xbf0a6f7d, + 0xbfa2ffea, 0xc405006e, + 0x008ce800, 0x00000000, + 0xc405006e, 0x008ce801, + 0x00008000, 0xc405006e, + 0x008ce802, 0x00010000, + 0xc405006e, 0x008ce803, + 0x00018000, 0xbf8a0000, + 0xbfa0003d, 0xbef600ff, + 0x01000000, 0xbeee0078, + 0x8078ff78, 0x00000400, + 0xbefd0084, 0xbf0a6f7d, + 0xbfa10016, 0xc4050078, + 0x008ce800, 0x00000000, + 0xc4050078, 0x008ce801, + 0x00010000, 0xc4050078, + 0x008ce802, 0x00020000, + 0xc4050078, 0x008ce803, + 0x00030000, 0xbf8a0000, + 0x7e008500, 0x7e028501, + 0x7e048502, 0x7e068503, + 0x807d847d, 0x8078ff78, + 0x00000400, 0xbf0a6f7d, + 0xbfa2ffea, 0xb8ef1e06, + 0x8b6fc16f, 0xbfa1000f, + 0x846f836f, 0x806f7d6f, + 0xbefe00c1, 0xbeff0080, + 0xc4050078, 0x008ce800, + 0x00000000, 0xbf8a0000, + 0x7e008500, 0x807d817d, + 0x8078ff78, 0x00000080, + 0xbf0a6f7d, 0xbfa2fff6, + 0xbeff00c1, 0xc405006e, + 0x008ce800, 0x00000000, + 0xc405006e, 0x008ce801, + 0x00010000, 0xc405006e, + 0x008ce802, 0x00020000, + 0xc405006e, 0x008ce803, + 0x00030000, 0xbf8a0000, 0xb8f83b05, 0x80788178, 0xbf0d9972, 0xbfa20002, 0x84788978, 0xbfa00001, 0x84788a78, 0xb8ee1e06, 0x846e8a6e, 0x80786e78, 0x8078ff78, 0x00000200, - 0x8078ff78, 0x00000080, - 0xbef600ff, 0x01000000, - 0x857d9972, 0x8b7d817d, - 0xbf06817d, 0xbefd0080, - 0xbfa2000d, 0xc4050078, - 0x0080e800, 0x00000000, - 0xbf8a0000, 0xdac00000, - 0x00000000, 0x807dff7d, - 0x00000080, 0x8078ff78, - 0x00000080, 0xbf0a6f7d, - 0xbfa2fff4, 0xbfa0000c, - 0xc4050078, 0x0080e800, - 0x00000000, 0xbf8a0000, - 0xdac00000, 0x00000000, - 0x807dff7d, 0x00000100, - 0x8078ff78, 0x00000100, - 0xbf0a6f7d, 0xbfa2fff4, - 0xbef80080, 0xbefe00c1, - 0x857d9972, 0x8b7d817d, - 0xbf06817d, 0xbfa20002, - 0xbeff0080, 0xbfa00001, - 0xbeff00c1, 0xb8ef3b05, - 0x806f816f, 0x846f826f, - 0x857d9972, 0x8b7d817d, - 0xbf06817d, 0xbfa2002c, + 0x80f8ff78, 0x00000050, 0xbef600ff, 0x01000000, - 0xbeee0078, 0x8078ff78, - 0x00000200, 0xbefd0084, - 0xbf0a6f7d, 0xbfa10061, - 0xc4050078, 0x008ce800, - 0x00000000, 0xc4050078, - 0x008ce801, 0x00008000, - 0xc4050078, 0x008ce802, - 0x00010000, 0xc4050078, - 0x008ce803, 0x00018000, - 0xbf8a0000, 0x7e008500, - 0x7e028501, 0x7e048502, - 0x7e068503, 0x807d847d, + 0xbefd00ff, 0x0000006c, + 0x80f89078, 0xf462403a, + 0xf0000000, 0xbf8a0000, + 0x80fd847d, 0xbf800000, + 0xbe804300, 0xbe824302, + 0x80f8a078, 0xf462603a, + 0xf0000000, 0xbf8a0000, + 0x80fd887d, 0xbf800000, + 0xbe804300, 0xbe824302, + 0xbe844304, 0xbe864306, + 0x80f8c078, 0xf462803a, + 0xf0000000, 0xbf8a0000, + 0x80fd907d, 0xbf800000, + 0xbe804300, 0xbe824302, + 0xbe844304, 0xbe864306, + 0xbe884308, 0xbe8a430a, + 0xbe8c430c, 0xbe8e430e, + 0xbf06807d, 0xbfa1fff0, + 0xb980f801, 0x00000000, + 0xb8f83b05, 0x80788178, + 0xbf0d9972, 0xbfa20002, + 0x84788978, 0xbfa00001, + 0x84788a78, 0xb8ee1e06, + 0x846e8a6e, 0x80786e78, 0x8078ff78, 0x00000200, - 0xbf0a6f7d, 0xbfa2ffea, - 0xc405006e, 0x008ce800, - 0x00000000, 0xc405006e, - 0x008ce801, 0x00008000, - 0xc405006e, 0x008ce802, - 0x00010000, 0xc405006e, - 0x008ce803, 0x00018000, - 0xbf8a0000, 0xbfa0003d, 0xbef600ff, 0x01000000, - 0xbeee0078, 0x8078ff78, - 0x00000400, 0xbefd0084, - 0xbf0a6f7d, 0xbfa10016, - 0xc4050078, 0x008ce800, - 0x00000000, 0xc4050078, - 0x008ce801, 0x00010000, - 0xc4050078, 0x008ce802, - 0x00020000, 0xc4050078, - 0x008ce803, 0x00030000, - 0xbf8a0000, 0x7e008500, - 0x7e028501, 0x7e048502, - 0x7e068503, 0x807d847d, - 0x8078ff78, 0x00000400, - 0xbf0a6f7d, 0xbfa2ffea, - 0xb8ef1e06, 0x8b6fc16f, - 0xbfa1000f, 0x846f836f, - 0x806f7d6f, 0xbefe00c1, - 0xbeff0080, 0xc4050078, - 0x008ce800, 0x00000000, - 0xbf8a0000, 0x7e008500, - 0x807d817d, 0x8078ff78, - 0x00000080, 0xbf0a6f7d, - 0xbfa2fff6, 0xbeff00c1, - 0xc405006e, 0x008ce800, - 0x00000000, 0xc405006e, - 0x008ce801, 0x00010000, - 0xc405006e, 0x008ce802, - 0x00020000, 0xc405006e, - 0x008ce803, 0x00030000, - 0xbf8a0000, 0xb8f83b05, - 0x80788178, 0xbf0d9972, - 0xbfa20002, 0x84788978, - 0xbfa00001, 0x84788a78, - 0xb8ee1e06, 0x846e8a6e, - 0x80786e78, 0x8078ff78, - 0x00000200, 0x80f8ff78, - 0x00000050, 0xbef600ff, - 0x01000000, 0xbefd00ff, - 0x0000006c, 0x80f89078, - 0xf462403a, 0xf0000000, - 0xbf8a0000, 0x80fd847d, - 0xbf800000, 0xbe804300, - 0xbe824302, 0x80f8a078, - 0xf462603a, 0xf0000000, - 0xbf8a0000, 0x80fd887d, - 0xbf800000, 0xbe804300, - 0xbe824302, 0xbe844304, - 0xbe864306, 0x80f8c078, - 0xf462803a, 0xf0000000, - 0xbf8a0000, 0x80fd907d, - 0xbf800000, 0xbe804300, - 0xbe824302, 0xbe844304, - 0xbe864306, 0xbe884308, - 0xbe8a430a, 0xbe8c430c, - 0xbe8e430e, 0xbf06807d, - 0xbfa1fff0, 0xb980f801, - 0x00000000, 0xb8f83b05, - 0x80788178, 0xbf0d9972, - 0xbfa20002, 0x84788978, - 0xbfa00001, 0x84788a78, - 0xb8ee1e06, 0x846e8a6e, - 0x80786e78, 0x8078ff78, - 0x00000200, 0xbef600ff, - 0x01000000, 0xbeff0071, - 0xf4621bfa, 0xf0000000, - 0x80788478, 0xf4621b3a, + 0xbeff0071, 0xf4621bfa, 0xf0000000, 0x80788478, - 0xf4621b7a, 0xf0000000, - 0x80788478, 0xf4621c3a, + 0xf4621b3a, 0xf0000000, + 0x80788478, 0xf4621b7a, 0xf0000000, 0x80788478, - 0xf4621c7a, 0xf0000000, - 0x80788478, 0xf4621eba, + 0xf4621c3a, 0xf0000000, + 0x80788478, 0xf4621c7a, 0xf0000000, 0x80788478, - 0xf4621efa, 0xf0000000, - 0x80788478, 0xf4621e7a, + 0xf4621eba, 0xf0000000, + 0x80788478, 0xf4621efa, 0xf0000000, 0x80788478, - 0xf4621cfa, 0xf0000000, - 0x80788478, 0xf4621bba, + 0xf4621e7a, 0xf0000000, + 0x80788478, 0xf4621cfa, 0xf0000000, 0x80788478, - 0xbf8a0000, 0xb96ef814, 0xf4621bba, 0xf0000000, 0x80788478, 0xbf8a0000, - 0xb96ef815, 0xf4621bba, + 0xb96ef814, 0xf4621bba, 0xf0000000, 0x80788478, - 0xbf8a0000, 0xb96ef812, + 0xbf8a0000, 0xb96ef815, 0xf4621bba, 0xf0000000, 0x80788478, 0xbf8a0000, - 0xb96ef813, 0x8b6eff7f, - 0x04000000, 0xbfa1000d, - 0x80788478, 0xf4621bba, + 0xb96ef812, 0xf4621bba, 0xf0000000, 0x80788478, - 0xbf8a0000, 0xbf0d806e, - 0xbfa10006, 0x856e906e, - 0x8b6e6e6e, 0xbfa10003, - 0xbe804ec1, 0x816ec16e, - 0xbfa0fffb, 0xbefd006f, - 0xbefe0070, 0xbeff0071, - 0xb97b2011, 0x857b867b, - 0xb97b0191, 0x857b827b, - 0xb97bba11, 0xb973f801, - 0xb8ee3b05, 0x806e816e, - 0xbf0d9972, 0xbfa20002, - 0x846e896e, 0xbfa00001, - 0x846e8a6e, 0xb8ef1e06, - 0x846f8a6f, 0x806e6f6e, - 0x806eff6e, 0x00000200, - 0x806e746e, 0x826f8075, - 0x8b6fff6f, 0x0000ffff, - 0xf4605c37, 0xf8000050, - 0xf4605d37, 0xf8000060, - 0xf4601e77, 0xf8000074, - 0xbf8a0000, 0x8b6dff6d, - 0x0000ffff, 0x8bfe7e7e, - 0x8bea6a6a, 0xb97af804, + 0xbf8a0000, 0xb96ef813, + 0x8b6eff7f, 0x04000000, + 0xbfa1000d, 0x80788478, + 0xf4621bba, 0xf0000000, + 0x80788478, 0xbf8a0000, + 0xbf0d806e, 0xbfa10006, + 0x856e906e, 0x8b6e6e6e, + 0xbfa10003, 0xbe804ec1, + 0x816ec16e, 0xbfa0fffb, + 0xbefd006f, 0xbefe0070, + 0xbeff0071, 0xb97b2011, + 0x857b867b, 0xb97b0191, + 0x857b827b, 0xb97bba11, + 0xb973f801, 0xb8ee3b05, + 0x806e816e, 0xbf0d9972, + 0xbfa20002, 0x846e896e, + 0xbfa00001, 0x846e8a6e, + 0xb8ef1e06, 0x846f8a6f, + 0x806e6f6e, 0x806eff6e, + 0x00000200, 0x806e746e, + 0x826f8075, 0x8b6fff6f, + 0x0000ffff, 0xf4605c37, + 0xf8000050, 0xf4605d37, + 0xf8000060, 0xf4601e77, + 0xf8000074, 0xbf8a0000, + 0x8b6dff6d, 0x0000ffff, + 0x8bfe7e7e, 0x8bea6a6a, + 0xb97af804, 0xbe804ec2, + 0xbf94fffe, 0xbe804a6c, 0xbe804ec2, 0xbf94fffe, - 0xbe804a6c, 0xbe804ec2, - 0xbf94fffe, 0xbfb10000, + 0xbfb10000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, - 0xbf9f0000, 0x00000000, }; static const uint32_t cwsr_trap_gfx9_5_0_hex[] = { diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm index 7b9d36e5fa43..5a1a1b1f897f 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm @@ -30,6 +30,7 @@ #define CHIP_GFX12 37 #define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost TRAP_AFTER_INST exception when SAVECTX raised +#define HAVE_VALU_SGPR_HAZARD (ASIC_FAMILY == CHIP_GFX12) var SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK = 0x4 var SQ_WAVE_STATE_PRIV_SCC_SHIFT = 9 @@ -351,6 +352,7 @@ L_HAVE_VGPRS: v_writelane_b32 v0, ttmp13, 0xD v_writelane_b32 v0, exec_lo, 0xE v_writelane_b32 v0, exec_hi, 0xF + valu_sgpr_hazard() s_mov_b32 exec_lo, 0x3FFF s_mov_b32 exec_hi, 0x0 @@ -417,7 +419,6 @@ L_SAVE_HWREG: v_mov_b32 v0, 0x0 //Offset[31:0] from buffer resource v_mov_b32 v1, 0x0 //Offset[63:32] from buffer resource v_mov_b32 v2, 0x0 //Set of SGPRs for TCP store - s_mov_b32 m0, 0x0 //Next lane of v2 to write to // Ensure no further changes to barrier or LDS state. // STATE_PRIV.BARRIER_COMPLETE may change up to this point. @@ -430,40 +431,41 @@ L_SAVE_HWREG: s_andn2_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK s_or_b32 s_save_state_priv, s_save_state_priv, s_save_tmp - write_hwreg_to_v2(s_save_m0) - write_hwreg_to_v2(s_save_pc_lo) s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK - write_hwreg_to_v2(s_save_tmp) - write_hwreg_to_v2(s_save_exec_lo) - write_hwreg_to_v2(s_save_exec_hi) - write_hwreg_to_v2(s_save_state_priv) + v_writelane_b32 v2, s_save_m0, 0x0 + v_writelane_b32 v2, s_save_pc_lo, 0x1 + v_writelane_b32 v2, s_save_tmp, 0x2 + v_writelane_b32 v2, s_save_exec_lo, 0x3 + v_writelane_b32 v2, s_save_exec_hi, 0x4 + v_writelane_b32 v2, s_save_state_priv, 0x5 + v_writelane_b32 v2, s_save_xnack_mask, 0x7 + valu_sgpr_hazard() s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV) - write_hwreg_to_v2(s_save_tmp) + v_writelane_b32 v2, s_save_tmp, 0x6 - write_hwreg_to_v2(s_save_xnack_mask) + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_MODE) + v_writelane_b32 v2, s_save_tmp, 0x8 - s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_MODE) - write_hwreg_to_v2(s_save_m0) + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_SCRATCH_BASE_LO) + v_writelane_b32 v2, s_save_tmp, 0x9 - s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_SCRATCH_BASE_LO) - write_hwreg_to_v2(s_save_m0) + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_SCRATCH_BASE_HI) + v_writelane_b32 v2, s_save_tmp, 0xA - s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_SCRATCH_BASE_HI) - write_hwreg_to_v2(s_save_m0) + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) + v_writelane_b32 v2, s_save_tmp, 0xB - s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_EXCP_FLAG_USER) - write_hwreg_to_v2(s_save_m0) - - s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_TRAP_CTRL) - write_hwreg_to_v2(s_save_m0) + s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_TRAP_CTRL) + v_writelane_b32 v2, s_save_tmp, 0xC s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS) - write_hwreg_to_v2(s_save_tmp) + v_writelane_b32 v2, s_save_tmp, 0xD s_get_barrier_state s_save_tmp, -1 s_wait_kmcnt (0) - write_hwreg_to_v2(s_save_tmp) + v_writelane_b32 v2, s_save_tmp, 0xE + valu_sgpr_hazard() // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this. s_mov_b32 exec_lo, 0xFFFF @@ -497,10 +499,12 @@ L_SAVE_SGPR_LOOP: s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0] s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0] - write_16sgpr_to_v2(s0) - - s_cmp_eq_u32 ttmp13, 0x20 //have 32 VGPR lanes filled? - s_cbranch_scc0 L_SAVE_SGPR_SKIP_TCP_STORE + s_cmp_eq_u32 ttmp13, 0x0 + s_cbranch_scc0 L_WRITE_V2_SECOND_HALF + write_16sgpr_to_v2(s0, 0x0) + s_branch L_SAVE_SGPR_SKIP_TCP_STORE +L_WRITE_V2_SECOND_HALF: + write_16sgpr_to_v2(s0, 0x10) buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS s_add_u32 s_save_mem_offset, s_save_mem_offset, 0x80 @@ -1056,27 +1060,21 @@ L_END_PGM: s_endpgm_saved end -function write_hwreg_to_v2(s) - // Copy into VGPR for later TCP store. - v_writelane_b32 v2, s, m0 - s_add_u32 m0, m0, 0x1 -end - - -function write_16sgpr_to_v2(s) +function write_16sgpr_to_v2(s, lane_offset) // Copy into VGPR for later TCP store. for var sgpr_idx = 0; sgpr_idx < 16; sgpr_idx ++ - v_writelane_b32 v2, s[sgpr_idx], ttmp13 - s_add_u32 ttmp13, ttmp13, 0x1 + v_writelane_b32 v2, s[sgpr_idx], sgpr_idx + lane_offset end + valu_sgpr_hazard() + s_add_u32 ttmp13, ttmp13, 0x10 end function write_12sgpr_to_v2(s) // Copy into VGPR for later TCP store. for var sgpr_idx = 0; sgpr_idx < 12; sgpr_idx ++ - v_writelane_b32 v2, s[sgpr_idx], ttmp13 - s_add_u32 ttmp13, ttmp13, 0x1 + v_writelane_b32 v2, s[sgpr_idx], sgpr_idx end + valu_sgpr_hazard() end function read_hwreg_from_mem(s, s_rsrc, s_mem_offset) @@ -1128,3 +1126,11 @@ function get_wave_size2(s_reg) s_getreg_b32 s_reg, hwreg(HW_REG_WAVE_STATUS,SQ_WAVE_STATUS_WAVE64_SHIFT,SQ_WAVE_STATUS_WAVE64_SIZE) s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE end + +function valu_sgpr_hazard +#if HAVE_VALU_SGPR_HAZARD + for var rep = 0; rep < 8; rep ++ + ds_nop + end +#endif +end From 5b3922222cea2d1e95a72994bd79abd02e0f3f5e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 11 Mar 2025 15:53:55 -0400 Subject: [PATCH 58/68] drm/amdgpu/pm: enable vcn busy sysfs for GC 12.x Make it visible for the all GC 12.x chips that support it. Reviewed-by: Lijo Lazar Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index c1f5ef7384b4..74e75585a43b 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2326,7 +2326,9 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ gc_ver == IP_VERSION(11, 5, 0) || gc_ver == IP_VERSION(11, 5, 1) || gc_ver == IP_VERSION(11, 5, 2) || - gc_ver == IP_VERSION(11, 5, 3))) + gc_ver == IP_VERSION(11, 5, 3) || + gc_ver == IP_VERSION(12, 0, 0) || + gc_ver == IP_VERSION(12, 0, 1))) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pcie_bw)) { /* PCIe Perf counters won't work on APU nodes */ From 15030aeec3934d32622f73909d54944c8cacf227 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 11 Mar 2025 16:34:45 -0400 Subject: [PATCH 59/68] drm/amdgpu/pm: enable vcn busy sysfs for GC 9.3.0 Make it visible for the all GC 9.3.0 chips that support it. Reviewed-by: Lijo Lazar Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 74e75585a43b..744a72f40dec 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2314,7 +2314,8 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ gc_ver == IP_VERSION(9, 0, 1)) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(vcn_busy_percent)) { - if (!(gc_ver == IP_VERSION(10, 3, 1) || + if (!(gc_ver == IP_VERSION(9, 3, 0) || + gc_ver == IP_VERSION(10, 3, 1) || gc_ver == IP_VERSION(10, 3, 3) || gc_ver == IP_VERSION(10, 3, 6) || gc_ver == IP_VERSION(10, 3, 7) || From 8d5e70ba5da21452735474b70322446aeb442c94 Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Thu, 6 Feb 2025 13:09:00 +0800 Subject: [PATCH 60/68] drm/amdgpu: Add amdgpu_sriov_multi_vf_mode function Use amdgpu_sriov_multi_vf_mode to replace amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev). Signed-off-by: Emily Deng Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 2 ++ drivers/gpu/drm/amd/pm/amdgpu_pm.c | 4 ++-- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 8 ++++---- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 49ca8c814455..a1450f13d963 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1990,7 +1990,7 @@ static int amdgpu_debugfs_sclk_set(void *data, u64 val) uint32_t max_freq, min_freq; struct amdgpu_device *adev = (struct amdgpu_device *)data; - if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + if (amdgpu_sriov_multi_vf_mode(adev)) return -EINVAL; ret = pm_runtime_get_sync(adev_to_drm(adev)->dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 9f65487e60f5..df03dba67ab8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -364,6 +364,8 @@ static inline bool is_virtual_machine(void) #define amdgpu_sriov_is_pp_one_vf(adev) \ ((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF) +#define amdgpu_sriov_multi_vf_mode(adev) \ + (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) #define amdgpu_sriov_is_debug(adev) \ ((!amdgpu_in_reset(adev)) && adev->virt.tdr_debug) #define amdgpu_sriov_is_normal(adev) \ diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 744a72f40dec..922def51685b 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -1936,7 +1936,7 @@ static int pp_od_clk_voltage_attr_update(struct amdgpu_device *adev, struct amdg if (gc_ver == IP_VERSION(9, 4, 3) || gc_ver == IP_VERSION(9, 4, 4) || gc_ver == IP_VERSION(9, 5, 0)) { - if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + if (amdgpu_sriov_multi_vf_mode(adev)) *states = ATTR_STATE_UNSUPPORTED; return 0; } @@ -1971,7 +1971,7 @@ static int pp_dpm_dcefclk_attr_update(struct amdgpu_device *adev, struct amdgpu_ * setting should not be allowed from VF if not in one VF mode. */ if (gc_ver >= IP_VERSION(10, 0, 0) || - (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))) { + (amdgpu_sriov_multi_vf_mode(adev))) { dev_attr->attr.mode &= ~S_IWUGO; dev_attr->store = NULL; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 8cfb07549f54..d1aa6075c4f4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1814,7 +1814,7 @@ static int smu_hw_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; struct smu_context *smu = adev->powerplay.pp_handle; - if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) { + if (amdgpu_sriov_multi_vf_mode(adev)) { smu->pm_enabled = false; return 0; } @@ -2038,7 +2038,7 @@ static int smu_hw_fini(struct amdgpu_ip_block *ip_block) struct smu_context *smu = adev->powerplay.pp_handle; int i, ret; - if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + if (amdgpu_sriov_multi_vf_mode(adev)) return 0; for (i = 0; i < adev->vcn.num_vcn_inst; i++) { @@ -2106,7 +2106,7 @@ static int smu_suspend(struct amdgpu_ip_block *ip_block) int ret; uint64_t count; - if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + if (amdgpu_sriov_multi_vf_mode(adev)) return 0; if (!smu->pm_enabled) @@ -2142,7 +2142,7 @@ static int smu_resume(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; struct smu_context *smu = adev->powerplay.pp_handle; - if (amdgpu_sriov_vf(adev)&& !amdgpu_sriov_is_pp_one_vf(adev)) + if (amdgpu_sriov_multi_vf_mode(adev)) return 0; if (!smu->pm_enabled) From 2da3af5f0b4deda735898f5c587be4324dda3fbd Mon Sep 17 00:00:00 2001 From: Emily Deng Date: Fri, 7 Feb 2025 14:00:00 +0800 Subject: [PATCH 61/68] drm/amdgpu: set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_MODE to 1 for sriov multiple vf. In sriov multiple vf, Set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_MODE to 1 to read WPTR from MQD. Signed-off-by: Emily Deng Acked-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 2 +- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 27 ++++++++++++++++--- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 476542b6e7b5..efe45e4edfd7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1821,7 +1821,7 @@ static int gfx_v9_4_3_xcc_mqd_init(struct amdgpu_ring *ring, int xcc_id) DOORBELL_SOURCE, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_multi_vf_mode(adev)) tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_MODE, 1); } else { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 3014925d95ff..80320a6c8854 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -554,7 +554,7 @@ static void init_mqd_hiq_v9_4_3(struct mqd_manager *mm, void **mqd, m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK | 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT | 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT; - if (amdgpu_sriov_vf(mm->dev->adev)) + if (amdgpu_sriov_multi_vf_mode(mm->dev->adev)) m->cp_hqd_pq_doorbell_control |= 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT; m->cp_mqd_stride_size = kfd_hiq_mqd_stride(mm->dev); @@ -667,7 +667,9 @@ static void init_mqd_v9_4_3(struct mqd_manager *mm, void **mqd, get_xcc_mqd(mqd_mem_obj, &xcc_mqd_mem_obj, offset*xcc); init_mqd(mm, (void **)&m, &xcc_mqd_mem_obj, &xcc_gart_addr, q); - + if (amdgpu_sriov_multi_vf_mode(mm->dev->adev)) + m->cp_hqd_pq_doorbell_control |= 1 << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT; m->cp_mqd_stride_size = offset; /* @@ -727,6 +729,9 @@ static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, m = get_mqd(mqd + size * xcc); update_mqd(mm, m, q, minfo); + if (amdgpu_sriov_multi_vf_mode(mm->dev->adev)) + m->cp_hqd_pq_doorbell_control |= 1 << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT; update_cu_mask(mm, m, minfo, xcc); if (q->format == KFD_QUEUE_FORMAT_AQL) { @@ -749,6 +754,21 @@ static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, } } +static void restore_mqd_v9_4_3(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *qp, + const void *mqd_src, + const void *ctl_stack_src, u32 ctl_stack_size) +{ + restore_mqd(mm, mqd, mqd_mem_obj, gart_addr, qp, mqd_src, ctl_stack_src, ctl_stack_size); + if (amdgpu_sriov_multi_vf_mode(mm->dev->adev)) { + struct v9_mqd *m; + + m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr; + m->cp_hqd_pq_doorbell_control |= 1 << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT; + } +} static int destroy_mqd_v9_4_3(struct mqd_manager *mm, void *mqd, enum kfd_preempt_type type, unsigned int timeout, uint32_t pipe_id, uint32_t queue_id) @@ -883,7 +903,6 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->is_occupied = kfd_is_occupied_cp; mqd->get_checkpoint_info = get_checkpoint_info; mqd->checkpoint_mqd = checkpoint_mqd; - mqd->restore_mqd = restore_mqd; mqd->mqd_size = sizeof(struct v9_mqd); mqd->mqd_stride = mqd_stride_v9; #if defined(CONFIG_DEBUG_FS) @@ -895,12 +914,14 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->init_mqd = init_mqd_v9_4_3; mqd->load_mqd = load_mqd_v9_4_3; mqd->update_mqd = update_mqd_v9_4_3; + mqd->restore_mqd = restore_mqd_v9_4_3; mqd->destroy_mqd = destroy_mqd_v9_4_3; mqd->get_wave_state = get_wave_state_v9_4_3; } else { mqd->init_mqd = init_mqd; mqd->load_mqd = load_mqd; mqd->update_mqd = update_mqd; + mqd->restore_mqd = restore_mqd; mqd->destroy_mqd = kfd_destroy_mqd_cp; mqd->get_wave_state = get_wave_state; } From a4b6e990d788ba0835476a9a9b0dfad01113ed32 Mon Sep 17 00:00:00 2001 From: ganglxie Date: Tue, 11 Mar 2025 18:35:44 +0800 Subject: [PATCH 62/68] drm/amdgpu: Save PA of bad pages for old asics for old asics that do not support mca translating, we just save PA for them Signed-off-by: ganglxie Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 24 ++++++++++++++++--- .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 9 +++++-- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 285e3aa2bb2f..7cf8a3036828 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2836,6 +2836,13 @@ static int __amdgpu_ras_convert_rec_array_from_rom(struct amdgpu_device *adev, save_nps = (bps[0].retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK; + /*old asics just have pa in eeprom*/ + if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) { + memcpy(err_data->err_addr, bps, + sizeof(struct eeprom_table_record) * adev->umc.retire_unit); + goto out; + } + for (i = 0; i < adev->umc.retire_unit; i++) bps[i].retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT); @@ -2858,6 +2865,7 @@ static int __amdgpu_ras_convert_rec_array_from_rom(struct amdgpu_device *adev, } } +out: return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr, adev->umc.retire_unit); } @@ -2981,14 +2989,24 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev, /* only new entries are saved */ if (save_count > 0) { - for (i = 0; i < unit_num; i++) { + /*old asics only save pa to eeprom like before*/ + if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) { if (amdgpu_ras_eeprom_append(control, - &data->bps[bad_page_num + i * adev->umc.retire_unit], - 1)) { + &data->bps[bad_page_num], save_count)) { dev_err(adev->dev, "Failed to save EEPROM table data!"); return -EIO; } + } else { + for (i = 0; i < unit_num; i++) { + if (amdgpu_ras_eeprom_append(control, + &data->bps[bad_page_num + + i * adev->umc.retire_unit], 1)) { + dev_err(adev->dev, "Failed to save EEPROM table data!"); + return -EIO; + } + } } + dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 09a6f8bc1a5a..3597ecd9baca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -727,9 +727,14 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control, - control->ras_fri) % control->ras_max_record_count; - control->ras_num_mca_recs += num; - control->ras_num_bad_pages += num * adev->umc.retire_unit; + /*old asics only save pa to eeprom like before*/ + if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) + control->ras_num_pa_recs += num; + else + control->ras_num_mca_recs += num; + control->ras_num_bad_pages = control->ras_num_pa_recs + + control->ras_num_mca_recs * adev->umc.retire_unit; Out: kfree(buf); return res; From 0c7e053448945e5a4379dc4396c762d7422b11ca Mon Sep 17 00:00:00 2001 From: Amber Lin Date: Wed, 12 Mar 2025 21:14:43 -0400 Subject: [PATCH 63/68] drm/amdkfd: Correct F8_MODE for gfx950 Correct F8_MODE setting for gfx950 that was removed Fixes: 61972cd93af7 ("drm/amdkfd: Set per-process flags only once for gfx9/10/11/12") Signed-off-by: Amber Lin Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c index d794c8172b40..9fcc8c6e57b7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c @@ -71,8 +71,7 @@ static bool set_cache_memory_policy_v9(struct device_queue_manager *dqm, qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT; if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3) || - KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4) || - KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 5, 0)) + KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4)) qpd->sh_mem_config |= (1 << SH_MEM_CONFIG__F8_MODE__SHIFT); if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 5, 0)) { From 13c13bdd1b014eb8261326fe1d62cb4675f3c795 Mon Sep 17 00:00:00 2001 From: Xiang Liu Date: Fri, 28 Feb 2025 14:56:45 +0800 Subject: [PATCH 64/68] drm/amdgpu: Enable ACA by default for psp v13_0_6/v13_0_14 Enable ACA by default for psp v13_0_6/v13_0_14. Signed-off-by: Xiang Liu Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 7cf8a3036828..cfec29835634 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -3785,9 +3785,11 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 : adev->ras_hw_enabled & amdgpu_ras_mask; - /* aca is disabled by default except for psp v13_0_12 */ + /* aca is disabled by default except for psp v13_0_6/v13_0_12/v13_0_14 */ adev->aca.is_enabled = - (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12)); + (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)); /* bad page feature is not applicable to specific app platform */ if (adev->gmc.is_app_apu && From f81cd793119e7f4b426a825435d49cc10a081c7a Mon Sep 17 00:00:00 2001 From: Shaoyun Liu Date: Mon, 10 Mar 2025 12:38:12 -0400 Subject: [PATCH 65/68] drm/amd/amdgpu: Fix MES init sequence When MES is been used , the set_hw_resource_1 API is required to initialize MES internal context correctly Signed-off-by: Shaoyun Liu Acked-by: Alex Deucher Reviewed-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 6 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 9 ++-- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 59 ++++++++++++------------ drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 43 ++++++++--------- 4 files changed, 57 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 68d640aaa2e1..da2c9a8cb3e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -143,9 +143,9 @@ struct amdgpu_mes { const struct amdgpu_mes_funcs *funcs; /* mes resource_1 bo*/ - struct amdgpu_bo *resource_1; - uint64_t resource_1_gpu_addr; - void *resource_1_addr; + struct amdgpu_bo *resource_1[AMDGPU_MAX_MES_PIPES]; + uint64_t resource_1_gpu_addr[AMDGPU_MAX_MES_PIPES]; + void *resource_1_addr[AMDGPU_MAX_MES_PIPES]; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index ab7e73d0e7b1..0bb8cbe0dcc0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -614,10 +614,11 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev) vf2pf_info->decode_usage = 0; vf2pf_info->dummy_page_addr = (uint64_t)adev->dummy_page_addr; - vf2pf_info->mes_info_addr = (uint64_t)adev->mes.resource_1_gpu_addr; - - if (adev->mes.resource_1) { - vf2pf_info->mes_info_size = adev->mes.resource_1->tbo.base.size; + if (amdgpu_sriov_is_mes_info_enable(adev)) { + vf2pf_info->mes_info_addr = + (uint64_t)(adev->mes.resource_1_gpu_addr[0] + AMDGPU_GPU_PAGE_SIZE); + vf2pf_info->mes_info_size = + adev->mes.resource_1[0]->tbo.base.size - AMDGPU_GPU_PAGE_SIZE; } vf2pf_info->checksum = amd_sriov_msg_checksum( diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 7eee41187b7c..e65916ada23b 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -740,10 +740,13 @@ static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes) mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1; mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_set_hw_res_pkt.enable_mes_info_ctx = 1; - mes_set_hw_res_pkt.mes_info_ctx_mc_addr = mes->resource_1_gpu_addr; - mes_set_hw_res_pkt.mes_info_ctx_size = MES11_HW_RESOURCE_1_SIZE; - mes_set_hw_res_pkt.cleaner_shader_fence_mc_addr = - mes->resource_1_gpu_addr + MES11_HW_RESOURCE_1_SIZE; + + mes_set_hw_res_pkt.cleaner_shader_fence_mc_addr = mes->resource_1_gpu_addr[0]; + if (amdgpu_sriov_is_mes_info_enable(mes->adev)) { + mes_set_hw_res_pkt.mes_info_ctx_mc_addr = + mes->resource_1_gpu_addr[0] + AMDGPU_GPU_PAGE_SIZE; + mes_set_hw_res_pkt.mes_info_ctx_size = MES11_HW_RESOURCE_1_SIZE; + } return mes_v11_0_submit_pkt_and_poll_completion(mes, &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), @@ -1381,7 +1384,7 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev, static int mes_v11_0_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - int pipe, r; + int pipe, r, bo_size; adev->mes.funcs = &mes_v11_0_funcs; adev->mes.kiq_hw_init = &mes_v11_0_kiq_hw_init; @@ -1416,19 +1419,21 @@ static int mes_v11_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - if (amdgpu_sriov_is_mes_info_enable(adev) || - adev->gfx.enable_cleaner_shader) { - r = amdgpu_bo_create_kernel(adev, - MES11_HW_RESOURCE_1_SIZE + AMDGPU_GPU_PAGE_SIZE, - PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->mes.resource_1, - &adev->mes.resource_1_gpu_addr, - &adev->mes.resource_1_addr); - if (r) { - dev_err(adev->dev, "(%d) failed to create mes resource_1 bo\n", r); - return r; - } + bo_size = AMDGPU_GPU_PAGE_SIZE; + if (amdgpu_sriov_is_mes_info_enable(adev)) + bo_size += MES11_HW_RESOURCE_1_SIZE; + + /* Only needed for AMDGPU_MES_SCHED_PIPE on MES 11*/ + r = amdgpu_bo_create_kernel(adev, + bo_size, + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->mes.resource_1[0], + &adev->mes.resource_1_gpu_addr[0], + &adev->mes.resource_1_addr[0]); + if (r) { + dev_err(adev->dev, "(%d) failed to create mes resource_1 bo\n", r); + return r; } return 0; @@ -1439,11 +1444,8 @@ static int mes_v11_0_sw_fini(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int pipe; - if (amdgpu_sriov_is_mes_info_enable(adev) || - adev->gfx.enable_cleaner_shader) { - amdgpu_bo_free_kernel(&adev->mes.resource_1, &adev->mes.resource_1_gpu_addr, - &adev->mes.resource_1_addr); - } + amdgpu_bo_free_kernel(&adev->mes.resource_1[0], &adev->mes.resource_1_gpu_addr[0], + &adev->mes.resource_1_addr[0]); for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { kfree(adev->mes.mqd_backup[pipe]); @@ -1632,13 +1634,10 @@ static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block) if (r) goto failure; - if (amdgpu_sriov_is_mes_info_enable(adev) || - adev->gfx.enable_cleaner_shader) { - r = mes_v11_0_set_hw_resources_1(&adev->mes); - if (r) { - DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r); - goto failure; - } + r = mes_v11_0_set_hw_resources_1(&adev->mes); + if (r) { + DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r); + goto failure; } r = mes_v11_0_query_sched_status(&adev->mes); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index fdc435b62012..183dd3346da5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -687,7 +687,7 @@ static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes, int pipe) mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 0xa; mes_set_hw_res_1_pkt.cleaner_shader_fence_mc_addr = - mes->resource_1_gpu_addr; + mes->resource_1_gpu_addr[pipe]; return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt), @@ -1519,23 +1519,22 @@ static int mes_v12_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) { r = mes_v12_0_kiq_ring_init(adev); - else + } + else { r = mes_v12_0_ring_init(adev, pipe); - if (r) - return r; - } - - if (adev->enable_uni_mes) { - r = amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->mes.resource_1, - &adev->mes.resource_1_gpu_addr, - &adev->mes.resource_1_addr); - if (r) { - dev_err(adev->dev, "(%d) failed to create mes resource_1 bo\n", r); - return r; + if (r) + return r; + r = amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->mes.resource_1[pipe], + &adev->mes.resource_1_gpu_addr[pipe], + &adev->mes.resource_1_addr[pipe]); + if (r) { + dev_err(adev->dev, "(%d) failed to create mes resource_1 bo pipe[%d]\n", r, pipe); + return r; + } } } @@ -1547,12 +1546,11 @@ static int mes_v12_0_sw_fini(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int pipe; - if (adev->enable_uni_mes) - amdgpu_bo_free_kernel(&adev->mes.resource_1, - &adev->mes.resource_1_gpu_addr, - &adev->mes.resource_1_addr); - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { + amdgpu_bo_free_kernel(&adev->mes.resource_1[pipe], + &adev->mes.resource_1_gpu_addr[pipe], + &adev->mes.resource_1_addr[pipe]); + kfree(adev->mes.mqd_backup[pipe]); amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[pipe], @@ -1751,8 +1749,7 @@ static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block) if (r) goto failure; - if (adev->enable_uni_mes) - mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE); + mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE); mes_v12_0_init_aggregated_doorbell(&adev->mes); From 42d9d7bed270247f134190ba0cb05bbd072f58c2 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Wed, 5 Feb 2025 10:06:38 -0300 Subject: [PATCH 66/68] drm/amd/display: avoid NPD when ASIC does not support DMUB ctx->dmub_srv will de NULL if the ASIC does not support DMUB, which is tested in dm_dmub_sw_init. However, it will be dereferenced in dmub_hw_lock_mgr_cmd if should_use_dmub_lock returns true. This has been the case since dmub support has been added for PSR1. Fix this by checking for dmub_srv in should_use_dmub_lock. [ 37.440832] BUG: kernel NULL pointer dereference, address: 0000000000000058 [ 37.447808] #PF: supervisor read access in kernel mode [ 37.452959] #PF: error_code(0x0000) - not-present page [ 37.458112] PGD 0 P4D 0 [ 37.460662] Oops: Oops: 0000 [#1] PREEMPT SMP NOPTI [ 37.465553] CPU: 2 UID: 1000 PID: 1745 Comm: DrmThread Not tainted 6.14.0-rc1-00003-gd62e938120f0 #23 99720e1cb1e0fc4773b8513150932a07de3c6e88 [ 37.478324] Hardware name: Google Morphius/Morphius, BIOS Google_Morphius.13434.858.0 10/26/2023 [ 37.487103] RIP: 0010:dmub_hw_lock_mgr_cmd+0x77/0xb0 [ 37.492074] Code: 44 24 0e 00 00 00 00 48 c7 04 24 45 00 00 0c 40 88 74 24 0d 0f b6 02 88 44 24 0c 8b 01 89 44 24 08 85 f6 75 05 c6 44 24 0e 01 <48> 8b 7f 58 48 89 e6 ba 01 00 00 00 e8 08 3c 2a 00 65 48 8b 04 5 [ 37.510822] RSP: 0018:ffff969442853300 EFLAGS: 00010202 [ 37.516052] RAX: 0000000000000000 RBX: ffff92db03000000 RCX: ffff969442853358 [ 37.523185] RDX: ffff969442853368 RSI: 0000000000000001 RDI: 0000000000000000 [ 37.530322] RBP: 0000000000000001 R08: 00000000000004a7 R09: 00000000000004a5 [ 37.537453] R10: 0000000000000476 R11: 0000000000000062 R12: ffff92db0ade8000 [ 37.544589] R13: ffff92da01180ae0 R14: ffff92da011802a8 R15: ffff92db03000000 [ 37.551725] FS: 0000784a9cdfc6c0(0000) GS:ffff92db2af00000(0000) knlGS:0000000000000000 [ 37.559814] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 37.565562] CR2: 0000000000000058 CR3: 0000000112b1c000 CR4: 00000000003506f0 [ 37.572697] Call Trace: [ 37.575152] [ 37.577258] ? __die_body+0x66/0xb0 [ 37.580756] ? page_fault_oops+0x3e7/0x4a0 [ 37.584861] ? exc_page_fault+0x3e/0xe0 [ 37.588706] ? exc_page_fault+0x5c/0xe0 [ 37.592550] ? asm_exc_page_fault+0x22/0x30 [ 37.596742] ? dmub_hw_lock_mgr_cmd+0x77/0xb0 [ 37.601107] dcn10_cursor_lock+0x1e1/0x240 [ 37.605211] program_cursor_attributes+0x81/0x190 [ 37.609923] commit_planes_for_stream+0x998/0x1ef0 [ 37.614722] update_planes_and_stream_v2+0x41e/0x5c0 [ 37.619703] dc_update_planes_and_stream+0x78/0x140 [ 37.624588] amdgpu_dm_atomic_commit_tail+0x4362/0x49f0 [ 37.629832] ? srso_return_thunk+0x5/0x5f [ 37.633847] ? mark_held_locks+0x6d/0xd0 [ 37.637774] ? _raw_spin_unlock_irq+0x24/0x50 [ 37.642135] ? srso_return_thunk+0x5/0x5f [ 37.646148] ? lockdep_hardirqs_on+0x95/0x150 [ 37.650510] ? srso_return_thunk+0x5/0x5f [ 37.654522] ? _raw_spin_unlock_irq+0x2f/0x50 [ 37.658883] ? srso_return_thunk+0x5/0x5f [ 37.662897] ? wait_for_common+0x186/0x1c0 [ 37.666998] ? srso_return_thunk+0x5/0x5f [ 37.671009] ? drm_crtc_next_vblank_start+0xc3/0x170 [ 37.675983] commit_tail+0xf5/0x1c0 [ 37.679478] drm_atomic_helper_commit+0x2a2/0x2b0 [ 37.684186] drm_atomic_commit+0xd6/0x100 [ 37.688199] ? __cfi___drm_printfn_info+0x10/0x10 [ 37.692911] drm_atomic_helper_update_plane+0xe5/0x130 [ 37.698054] drm_mode_cursor_common+0x501/0x670 [ 37.702600] ? __cfi_drm_mode_cursor_ioctl+0x10/0x10 [ 37.707572] drm_mode_cursor_ioctl+0x48/0x70 [ 37.711851] drm_ioctl_kernel+0xf2/0x150 [ 37.715781] drm_ioctl+0x363/0x590 [ 37.719189] ? __cfi_drm_mode_cursor_ioctl+0x10/0x10 [ 37.724165] amdgpu_drm_ioctl+0x41/0x80 [ 37.728013] __se_sys_ioctl+0x7f/0xd0 [ 37.731685] do_syscall_64+0x87/0x100 [ 37.735355] ? vma_end_read+0x12/0xe0 [ 37.739024] ? srso_return_thunk+0x5/0x5f [ 37.743041] ? find_held_lock+0x47/0xf0 [ 37.746884] ? vma_end_read+0x12/0xe0 [ 37.750552] ? srso_return_thunk+0x5/0x5f [ 37.754565] ? lock_release+0x1c4/0x2e0 [ 37.758406] ? vma_end_read+0x12/0xe0 [ 37.762079] ? exc_page_fault+0x84/0xe0 [ 37.765921] ? srso_return_thunk+0x5/0x5f [ 37.769938] ? lockdep_hardirqs_on+0x95/0x150 [ 37.774303] ? srso_return_thunk+0x5/0x5f [ 37.778317] ? exc_page_fault+0x84/0xe0 [ 37.782163] entry_SYSCALL_64_after_hwframe+0x55/0x5d [ 37.787218] RIP: 0033:0x784aa5ec3059 [ 37.790803] Code: 04 25 28 00 00 00 48 89 45 c8 31 c0 48 8d 45 10 c7 45 b0 10 00 00 00 48 89 45 b8 48 8d 45 d0 48 89 45 c0 b8 10 00 00 00 0f 05 <41> 89 c0 3d 00 f0 ff ff 77 1d 48 8b 45 c8 64 48 2b 04 25 28 00 0 [ 37.809553] RSP: 002b:0000784a9cdf90e0 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 37.817121] RAX: ffffffffffffffda RBX: 0000784a9cdf917c RCX: 0000784aa5ec3059 [ 37.824256] RDX: 0000784a9cdf917c RSI: 00000000c01c64a3 RDI: 0000000000000020 [ 37.831391] RBP: 0000784a9cdf9130 R08: 0000000000000100 R09: 0000000000ff0000 [ 37.838525] R10: 0000000000000000 R11: 0000000000000246 R12: 0000025c01606ed0 [ 37.845657] R13: 0000025c00030200 R14: 00000000c01c64a3 R15: 0000000000000020 [ 37.852799] [ 37.854992] Modules linked in: [ 37.864546] gsmi: Log Shutdown Reason 0x03 [ 37.868656] CR2: 0000000000000058 [ 37.871979] ---[ end trace 0000000000000000 ]--- [ 37.880976] RIP: 0010:dmub_hw_lock_mgr_cmd+0x77/0xb0 [ 37.885954] Code: 44 24 0e 00 00 00 00 48 c7 04 24 45 00 00 0c 40 88 74 24 0d 0f b6 02 88 44 24 0c 8b 01 89 44 24 08 85 f6 75 05 c6 44 24 0e 01 <48> 8b 7f 58 48 89 e6 ba 01 00 00 00 e8 08 3c 2a 00 65 48 8b 04 5 [ 37.904703] RSP: 0018:ffff969442853300 EFLAGS: 00010202 [ 37.909933] RAX: 0000000000000000 RBX: ffff92db03000000 RCX: ffff969442853358 [ 37.917068] RDX: ffff969442853368 RSI: 0000000000000001 RDI: 0000000000000000 [ 37.924201] RBP: 0000000000000001 R08: 00000000000004a7 R09: 00000000000004a5 [ 37.931336] R10: 0000000000000476 R11: 0000000000000062 R12: ffff92db0ade8000 [ 37.938469] R13: ffff92da01180ae0 R14: ffff92da011802a8 R15: ffff92db03000000 [ 37.945602] FS: 0000784a9cdfc6c0(0000) GS:ffff92db2af00000(0000) knlGS:0000000000000000 [ 37.953689] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 37.959435] CR2: 0000000000000058 CR3: 0000000112b1c000 CR4: 00000000003506f0 [ 37.966570] Kernel panic - not syncing: Fatal exception [ 37.971901] Kernel Offset: 0x30200000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 37.982840] gsmi: Log Shutdown Reason 0x02 Fixes: b5c764d6ed55 ("drm/amd/display: Use HW lock mgr for PSR1") Signed-off-by: Thadeu Lima de Souza Cascardo Cc: Sun peng Li Cc: Tom Chung Cc: Daniel Wheeler Cc: Alex Deucher Reviewed-by: Rodrigo Siqueira Reviewed-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c index bf636b28e3e1..a262598aa676 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c @@ -63,6 +63,10 @@ void dmub_hw_lock_mgr_inbox0_cmd(struct dc_dmub_srv *dmub_srv, bool should_use_dmub_lock(struct dc_link *link) { + /* ASIC doesn't support DMUB */ + if (!link->ctx->dmub_srv) + return false; + if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) return true; From ebdc52607a46cda08972888178c6aa9cd6965141 Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Wed, 12 Mar 2025 14:31:06 +0800 Subject: [PATCH 67/68] drm/amdgpu/gfx12: correct cleanup of 'me' field with gfx_v12_0_me_fini() In gfx_v12_0_cp_gfx_load_me_microcode_rs64(), gfx_v12_0_pfp_fini() is incorrectly used to free 'me' field of 'gfx', since gfx_v12_0_pfp_fini() can only release 'pfp' field of 'gfx'. The release function of 'me' field should be gfx_v12_0_me_fini(). Fixes: 52cb80c12e8a ("drm/amdgpu: Add gfx v12_0 ip block support (v6)") Signed-off-by: Wentao Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 87dc4195192c..da364e04f09c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -2455,7 +2455,7 @@ static int gfx_v12_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) (void **)&adev->gfx.me.me_fw_data_ptr); if (r) { dev_err(adev->dev, "(%d) failed to create me data bo\n", r); - gfx_v12_0_pfp_fini(adev); + gfx_v12_0_me_fini(adev); return r; } From eb6cdfb807d038d9b9986b5c87188f28a4071eae Mon Sep 17 00:00:00 2001 From: David Belanger Date: Tue, 2 Jul 2024 17:56:41 -0400 Subject: [PATCH 68/68] drm/amdgpu: Restore uncached behaviour on GFX12 Always use MTYPE_UC if UNCACHED flag is specified. This makes kernarg region uncached and it restores usermode cache disable debug flag functionality. Do not set MTYPE_UC for COHERENT flag, on GFX12 coherence is handled by shader code. Signed-off-by: David Belanger Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 22 ++-------------------- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 8 +------- 2 files changed, 3 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index bf8d01da8815..05c026d0b0d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -501,9 +501,6 @@ static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev, uint64_t *flags) { struct amdgpu_bo *bo = mapping->bo_va->base.bo; - struct amdgpu_device *bo_adev; - bool coherent, is_system; - *flags &= ~AMDGPU_PTE_EXECUTABLE; *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; @@ -519,26 +516,11 @@ static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev, *flags &= ~AMDGPU_PTE_VALID; } - if (!bo) - return; - - if (bo->flags & (AMDGPU_GEM_CREATE_COHERENT | - AMDGPU_GEM_CREATE_UNCACHED)) - *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC); - - bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); - coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT; - is_system = bo->tbo.resource && - (bo->tbo.resource->mem_type == TTM_PL_TT || - bo->tbo.resource->mem_type == AMDGPU_PL_PREEMPT); - if (bo && bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) *flags |= AMDGPU_PTE_DCC; - /* WA for HW bug */ - if (is_system || ((bo_adev != adev) && coherent)) - *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC); - + if (bo && bo->flags & AMDGPU_GEM_CREATE_UNCACHED) + *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC); } static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 63eeef2a75ba..100717a98ec1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1287,13 +1287,7 @@ svm_range_get_pte_flags(struct kfd_node *node, break; case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): - if (domain == SVM_RANGE_VRAM_DOMAIN) { - if (bo_node != node) - mapping_flags |= AMDGPU_VM_MTYPE_NC; - } else { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; - } + mapping_flags |= AMDGPU_VM_MTYPE_NC; break; default: mapping_flags |= coherent ?