From c3877608db1a55d5b1b6f163d96149fb108d9efd Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Thu, 11 Aug 2022 09:48:00 +0800 Subject: [PATCH 01/56] drm/amd/amdgpu: add ih cg and hdp sd on smu_v13_0_7 add ih cg and hdp sd on smu_v13_0_7 Signed-off-by: Kenneth Feng Reviewed-by: Hawking Zhang --- drivers/gpu/drm/amd/amdgpu/soc21.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 77d549dbe2a8c..71e184ca60045 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -577,7 +577,9 @@ static int soc21_common_early_init(void *handle) AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG | AMD_CG_SUPPORT_ATHUB_MGCG | - AMD_CG_SUPPORT_ATHUB_LS; + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_HDP_SD; adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | @@ -692,6 +694,7 @@ static int soc21_common_set_clockgating_state(void *handle, switch (adev->ip_versions[NBIO_HWIP][0]) { case IP_VERSION(4, 3, 0): + case IP_VERSION(4, 3, 1): adev->nbio.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, From 5cb5feb355becde250b5171bfe38e148b4e1ef54 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Thu, 11 Aug 2022 16:38:52 +0800 Subject: [PATCH 02/56] drm/amd/pm: add mode1 support on smu_v13_0_7 add mode1 support since it's missing on smu_v13_0_7 Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index d78375d9a1414..1016d1c216d8c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -1567,6 +1567,16 @@ static int smu_v13_0_7_set_mp1_state(struct smu_context *smu, return ret; } +static bool smu_v13_0_7_is_mode1_reset_supported(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + + /* SRIOV does not support SMU mode1 reset */ + if (amdgpu_sriov_vf(adev)) + return false; + + return true; +} static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_7_set_default_dpm_table, @@ -1626,6 +1636,8 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .baco_set_state = smu_v13_0_baco_set_state, .baco_enter = smu_v13_0_baco_enter, .baco_exit = smu_v13_0_baco_exit, + .mode1_reset_is_support = smu_v13_0_7_is_mode1_reset_supported, + .mode1_reset = smu_v13_0_mode1_reset, .set_mp1_state = smu_v13_0_7_set_mp1_state, }; From 5aad93da92f5bc0d4bd2b62efc9dc1050286a137 Mon Sep 17 00:00:00 2001 From: Asher Song Date: Fri, 12 Aug 2022 16:10:19 +0800 Subject: [PATCH 03/56] drm/amdkcl: test whether struct drm_dp_mst_port has member passthrough_aux It's caused by 0087990a9f572c6dd9533c973fe1072458f54b7a "drm/amd/display: consider DSC pass-through during mode validation" 99d08a5d1ad7fb76b33aabae46cd88bc7e6e6df4 "drm/amd/display: implement DSC pass-through support" Signed-off-by: Asher Song Reviewed-by: Flora Cui --- .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 13 +++++++++-- .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 6 ++++- drivers/gpu/drm/amd/dkms/config/config.h | 3 +++ .../gpu/drm/amd/dkms/m4/drm-dp-mst-port.m4 | 23 +++++++++++++++++++ drivers/gpu/drm/amd/dkms/m4/kernel.m4 | 1 + 5 files changed, 43 insertions(+), 3 deletions(-) create mode 100644 drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-port.m4 diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 24bd6b469a00e..191b1f0fd3452 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -818,14 +818,18 @@ bool dm_helpers_dp_write_dsc_enable( const struct dc_stream_state *stream, bool enable) { +#if defined(HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX) static const uint8_t DSC_DISABLE; static const uint8_t DSC_DECODING = 0x01; static const uint8_t DSC_PASSTHROUGH = 0x02; - struct amdgpu_dm_connector *aconnector; struct drm_dp_mst_port *port; uint8_t enable_dsc = enable ? DSC_DECODING : DSC_DISABLE; uint8_t enable_passthrough = enable ? DSC_PASSTHROUGH : DSC_DISABLE; +#else + uint8_t enable_dsc = enable ? 1 : 0; +#endif + struct amdgpu_dm_connector *aconnector; uint8_t ret = 0; if (!stream) @@ -844,7 +848,7 @@ bool dm_helpers_dp_write_dsc_enable( return write_dsc_enable_synaptics_non_virtual_dpcd_mst( aconnector->dsc_aux, stream, enable_dsc); #endif - +#if defined(HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX) port = aconnector->port; if (enable) { @@ -878,6 +882,11 @@ bool dm_helpers_dp_write_dsc_enable( ret); } } +#else + ret = drm_dp_dpcd_write(aconnector->dsc_aux, DP_DSC_ENABLE, &enable_dsc, 1); + DC_LOG_DC("Send DSC %s to MST RX\n", enable_dsc ? "enable" : "disable"); +#endif + } if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT || stream->signal == SIGNAL_TYPE_EDP) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 1b9ccce0a78e4..325fef27e8ec9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1602,12 +1602,13 @@ enum dc_status dm_dp_mst_is_port_support_mode( struct amdgpu_dm_connector *aconnector, struct dc_stream_state *stream) { + int bpp, pbn, branch_max_throughput_mps = 0; +#ifdef HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX struct dc_link_settings cur_link_settings; unsigned int end_to_end_bw_in_kbps = 0; unsigned int upper_link_bw_in_kbps = 0, down_link_bw_in_kbps = 0; unsigned int max_compressed_bw_in_kbps = 0; struct dc_dsc_bw_range bw_range = {0}; - int bpp, pbn, branch_max_throughput_mps = 0; /* * check if the mode could be supported if DSC pass-through is supported @@ -1642,13 +1643,16 @@ enum dc_status dm_dp_mst_is_port_support_mode( return DC_FAIL_BANDWIDTH_VALIDATE; } } else { +#endif /* check if mode could be supported within full_pbn */ bpp = convert_dc_color_depth_into_bpc(stream->timing.display_color_depth) * 3; pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp, false); if (pbn > aconnector->port->full_pbn) return DC_FAIL_BANDWIDTH_VALIDATE; +#ifdef HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX } +#endif /* check is mst dsc output bandwidth branch_overall_throughput_0_mps */ switch (stream->timing.pixel_encoding) { diff --git a/drivers/gpu/drm/amd/dkms/config/config.h b/drivers/gpu/drm/amd/dkms/config/config.h index 58cffa4328dd7..28806304010c5 100644 --- a/drivers/gpu/drm/amd/dkms/config/config.h +++ b/drivers/gpu/drm/amd/dkms/config/config.h @@ -402,6 +402,9 @@ /* drm_dp_mst_{get,put}_port_malloc() is available */ #define HAVE_DRM_DP_MST_GET_PUT_PORT_MALLOC 1 +/* struct drm_dp_mst_port has passthrough_aux member */ +/* #undef HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX */ + /* struct drm_dp_mst_topology_cbs->destroy_connector is available */ /* #undef HAVE_DRM_DP_MST_TOPOLOGY_CBS_DESTROY_CONNECTOR */ diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-port.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-port.m4 new file mode 100644 index 0000000000000..a1f26ca53e149 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-port.m4 @@ -0,0 +1,23 @@ +dnl # +dnl # commit v5.18-2579-g3af4b1f1d6e7 +dnl # "drm/dp_mst: add passthrough_aux to struct drm_dp_mst_port" +dnl +AC_DEFUN([AC_AMDGPU_DRM_DP_MST_POST_PASSTHROUGH_AUX], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #if defined(HAVE_DRM_DISPLAY_DRM_DP_MST_HELPER_H) + #include + #elif defined(HAVE_DRM_DP_DRM_DP_MST_HELPER_H) + #include + #else + #include + #endif + ], [ + struct drm_dp_mst_port *dp_mst_port = NULL; + dp_mst_port->passthrough_aux = NULL; + ], [ + AC_DEFINE(HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX, 1, + [struct drm_dp_mst_port has passthrough_aux member]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/kernel.m4 b/drivers/gpu/drm/amd/dkms/m4/kernel.m4 index 3bcd090500e66..42ee5087cd7b1 100644 --- a/drivers/gpu/drm/amd/dkms/m4/kernel.m4 +++ b/drivers/gpu/drm/amd/dkms/m4/kernel.m4 @@ -282,6 +282,7 @@ AC_DEFUN([AC_CONFIG_KERNEL], [ AC_AMDGPU_DRM_GEM_OBJECT_FUNCS_VMAP_HAS_IOSYS_MAP_ARG AC_AMDGPU_DRM_FB_HELPER_BUFFER AC_AMDGPU_DRM_DP_MST_TOPOLOGY_MGR_BASE + AC_AMDGPU_DRM_DP_MST_POST_PASSTHROUGH_AUX AC_KERNEL_WAIT AS_IF([test "$LINUX_OBJ" != "$LINUX"], [ From 05c47f5ec6828730c573c1e4668d65afcd482939 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Thu, 11 Aug 2022 16:38:52 +0800 Subject: [PATCH 04/56] drm/amd/pm: add mode1 support on smu_v13_0_7 add mode1 support since it's missing on smu_v13_0_7 Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang From 22ed845037ec1fb0ea6fa7c2d9df7e3f3984ad18 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Mon, 8 Aug 2022 17:00:38 -0400 Subject: [PATCH 05/56] drm/amdkfd: Fix mm reference in SVM eviction worker Use the mm reference from the fence. This allows removing the svm_bo->svms pointer, which was problematic because we cannot assume that the struct kfd_process containing the svms is still allocated without holding a refcount on the process. Use mmget_not_zero to ensure the mm is still valid, and drop the svm_bo reference if it isn't. Signed-off-by: Felix Kuehling Reviewed-by: Philip Yang --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 17 +++++++---------- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 1 - 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 4b9d2a15fb085..e5708de28ff3b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -545,7 +545,6 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, kfree(svm_bo); return -ESRCH; } - svm_bo->svms = prange->svms; svm_bo->eviction_fence = amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), mm, @@ -3277,7 +3276,6 @@ int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence) static void svm_range_evict_svm_bo_worker(struct work_struct *work) { struct svm_range_bo *svm_bo; - struct kfd_process *p; struct mm_struct *mm; int r = 0; @@ -3285,13 +3283,12 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) if (!svm_bo_ref_unless_zero(svm_bo)) return; /* svm_bo was freed while eviction was pending */ - /* svm_range_bo_release destroys this worker thread. So during - * the lifetime of this thread, kfd_process and mm will be valid. - */ - p = container_of(svm_bo->svms, struct kfd_process, svms); - mm = p->mm; - if (!mm) + if (mmget_not_zero(svm_bo->eviction_fence->mm)) { + mm = svm_bo->eviction_fence->mm; + } else { + svm_range_bo_unref(svm_bo); return; + } mmap_read_lock(mm); spin_lock(&svm_bo->list_lock); @@ -3309,8 +3306,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) mutex_lock(&prange->migrate_mutex); do { - r = svm_migrate_vram_to_ram(prange, - svm_bo->eviction_fence->mm, + r = svm_migrate_vram_to_ram(prange, mm, KFD_MIGRATE_TRIGGER_TTM_EVICTION); } while (!r && prange->actual_loc && --retries); @@ -3328,6 +3324,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) } spin_unlock(&svm_bo->list_lock); mmap_read_unlock(mm); + mmput(mm); dma_fence_signal(&svm_bo->eviction_fence->base); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 9156b041ef175..cfac13ad06ef0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -46,7 +46,6 @@ struct svm_range_bo { spinlock_t list_lock; struct amdgpu_amdkfd_fence *eviction_fence; struct work_struct eviction_work; - struct svm_range_list *svms; uint32_t evicting; struct work_struct release_work; }; From 7224e4c5325751a02c1ee6b6f27bd988c8b3e556 Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Fri, 5 Aug 2022 11:47:20 -0400 Subject: [PATCH 06/56] drm/amdgpu: use sjt mec fw on aldebaran for sriov The second jump table is required on live migration or mulitple VF configuration on Aldebaran. With this implemented, the first level jump table(hw used) will be same, mec fw internal will use the second level jump table jump to the real functionality implementation. so the different VF can load different version of MEC as long as they support sjt Signed-off-by: shaoyunl Acked-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 6aff376e6e188..02e86732267d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -126,6 +126,8 @@ MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin"); +MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin"); +MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin"); #define mmTCP_CHAN_STEER_0_ARCT 0x0b03 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 @@ -1496,7 +1498,11 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, const struct common_firmware_header *header = NULL; const struct gfx_firmware_header_v1_0 *cp_hdr; - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); + if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec.bin", chip_name); + else + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); + err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); if (err) goto out; @@ -1509,7 +1515,11 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, if (gfx_v9_0_load_mec2_fw_bin_support(adev)) { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); + if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec2.bin", chip_name); + else + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); + err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); if (!err) { err = amdgpu_ucode_validate(adev->gfx.mec2_fw); From 320c3e3e2b921b5eb61321ab6416603d181b82b5 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Thu, 11 Aug 2022 11:23:23 -0400 Subject: [PATCH 07/56] drm/amd/display: fix DSC related non-x86/PPC64 compilation issue Need to protect DSC code with CONFIG_DRM_AMD_DC_DCN. Fixes the following build errors on arm64: ERROR: modpost: "dc_dsc_get_policy_for_timing" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined! ERROR: modpost: "dc_dsc_compute_bandwidth_range" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined! Fixes: 0087990a9f57 ("drm/amd/display: consider DSC pass-through during mode validation") Reported-by: Anders Roxell Signed-off-by: Hamza Mahfooz Signed-off-by: Asher Song --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 325fef27e8ec9..c706b2d08b956 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1568,8 +1568,6 @@ bool pre_validate_dsc(struct drm_atomic_state *state, return (ret == 0); } -#endif - static unsigned int kbps_from_pbn(unsigned int pbn) { unsigned int kbps = pbn; @@ -1597,12 +1595,14 @@ static bool is_dsc_common_config_possible(struct dc_stream_state *stream, return bw_range->max_target_bpp_x16 && bw_range->min_target_bpp_x16; } +#endif /* CONFIG_DRM_AMD_DC_DCN */ enum dc_status dm_dp_mst_is_port_support_mode( struct amdgpu_dm_connector *aconnector, struct dc_stream_state *stream) { int bpp, pbn, branch_max_throughput_mps = 0; +#if defined(CONFIG_DRM_AMD_DC_DCN) #ifdef HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX struct dc_link_settings cur_link_settings; unsigned int end_to_end_bw_in_kbps = 0; @@ -1643,6 +1643,7 @@ enum dc_status dm_dp_mst_is_port_support_mode( return DC_FAIL_BANDWIDTH_VALIDATE; } } else { +#endif #endif /* check if mode could be supported within full_pbn */ bpp = convert_dc_color_depth_into_bpc(stream->timing.display_color_depth) * 3; @@ -1650,8 +1651,10 @@ enum dc_status dm_dp_mst_is_port_support_mode( if (pbn > aconnector->port->full_pbn) return DC_FAIL_BANDWIDTH_VALIDATE; +#if defined(CONFIG_DRM_AMD_DC_DCN) #ifdef HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX } +#endif #endif /* check is mst dsc output bandwidth branch_overall_throughput_0_mps */ From 8ba3e8b8bdc87ee1e1d4b23d4a995ec5d277569d Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 28 Jul 2022 16:33:47 -0400 Subject: [PATCH 08/56] drm/amd/display: Fix a compilation failure on PowerPC caused by FPU code We got a report from Stephen/Michael that the PowerPC build was failing with the following error: ld: drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.o uses hard float, drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.o uses soft float ld: failed to merge target specific data of file drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.o This error happened because of the function optc3_set_vrr_m_const. This function expects a double as a parameter in a code that is not allowed to have FPU operations. After further investigation, it became clear that optc3_set_vrr_m_const was never invoked, so we can safely drop this function and fix the ld issue. Cc: Alex Deucher Cc: Melissa Wen Reported-by: Stephen Rothwell Reported-by: Michael Ellerman Signed-off-by: Rodrigo Siqueira Tested-by: Michael Ellerman Acked-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c | 8 -------- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h | 3 --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c | 1 - drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h | 2 -- 4 files changed, 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c index 6df6598d217dc..d4b488b1f9ffe 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c @@ -186,14 +186,6 @@ void optc3_set_dsc_config(struct timing_generator *optc, } #endif -void optc3_set_vrr_m_const(struct timing_generator *optc, - double vtotal_avg) -{ - DC_FP_START(); - optc3_fpu_set_vrr_m_const(optc, vtotal_avg); - DC_FP_END(); -} - void optc3_set_odm_bypass(struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h index 62f0ba7a3dd03..c305c69f2d134 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h @@ -329,9 +329,6 @@ void optc3_lock_doublebuffer_enable(struct timing_generator *optc); void optc3_lock_doublebuffer_disable(struct timing_generator *optc); -void optc3_set_vrr_m_const(struct timing_generator *optc, - double vtotal_avg); - void optc3_set_drr_trigger_window(struct timing_generator *optc, uint32_t window_start, uint32_t window_end); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c index bd189802c7902..a222e56594e5c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c @@ -281,7 +281,6 @@ static struct timing_generator_funcs dcn32_tg_funcs = { .lock_doublebuffer_enable = optc3_lock_doublebuffer_enable, .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable, .enable_optc_clock = optc1_enable_optc_clock, - .set_vrr_m_const = optc3_set_vrr_m_const, .set_drr = optc32_set_drr, .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, .set_vtotal_min_max = optc3_set_vtotal_min_max, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 7e43b4403b3e3..01d46f0c1ffe4 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -312,8 +312,6 @@ struct timing_generator_funcs { int group_idx, uint32_t gsl_ready_signal); void (*set_out_mux)(struct timing_generator *tg, enum otg_out_mux_dest dest); - void (*set_vrr_m_const)(struct timing_generator *optc, - double vtotal_avg); void (*set_drr_trigger_window)(struct timing_generator *optc, uint32_t window_start, uint32_t window_end); void (*set_vtotal_change_limit)(struct timing_generator *optc, From e7bfa0f33953e97bebcdc28615e31fdd3bbedd4e Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 11 Aug 2022 16:01:33 -0400 Subject: [PATCH 09/56] drm/amd/display: Remove unused code We have some old code associated with DML, which we had plans to use, but at some point, we just moved away from it. This commit removes the dml_wrapper* files since they are not used anymore. Reported-by: Magali Lemes Acked-by: Alex Deucher Signed-off-by: Rodrigo Siqueira --- .../gpu/drm/amd/display/dc/dml/dml_wrapper.c | 1884 ----------------- .../display/dc/dml/dml_wrapper_translation.c | 284 --- 2 files changed, 2168 deletions(-) delete mode 100644 drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c delete mode 100644 drivers/gpu/drm/amd/display/dc/dml/dml_wrapper_translation.c diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c deleted file mode 100644 index b4b51e51fc25c..0000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c +++ /dev/null @@ -1,1884 +0,0 @@ -/* - * Copyright 2017 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "resource.h" -#include "core_types.h" -#include "dsc.h" -#include "clk_mgr.h" - -#ifndef DC_LOGGER_INIT -#define DC_LOGGER_INIT -#undef DC_LOG_WARNING -#define DC_LOG_WARNING -#endif - -#define DML_WRAPPER_TRANSLATION_ -#include "dml_wrapper_translation.c" -#undef DML_WRAPPER_TRANSLATION_ - -static bool is_dual_plane(enum surface_pixel_format format) -{ - return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA; -} - -static void build_clamping_params(struct dc_stream_state *stream) -{ - stream->clamping.clamping_level = CLAMPING_FULL_RANGE; - stream->clamping.c_depth = stream->timing.display_color_depth; - stream->clamping.pixel_encoding = stream->timing.pixel_encoding; -} - -static void get_pixel_clock_parameters( - const struct pipe_ctx *pipe_ctx, - struct pixel_clk_params *pixel_clk_params) -{ - const struct dc_stream_state *stream = pipe_ctx->stream; - - /*TODO: is this halved for YCbCr 420? in that case we might want to move - * the pixel clock normalization for hdmi up to here instead of doing it - * in pll_adjust_pix_clk - */ - pixel_clk_params->requested_pix_clk_100hz = stream->timing.pix_clk_100hz; - pixel_clk_params->encoder_object_id = stream->link->link_enc->id; - pixel_clk_params->signal_type = pipe_ctx->stream->signal; - pixel_clk_params->controller_id = pipe_ctx->stream_res.tg->inst + 1; - /* TODO: un-hardcode*/ - pixel_clk_params->requested_sym_clk = LINK_RATE_LOW * - LINK_RATE_REF_FREQ_IN_KHZ; - pixel_clk_params->flags.ENABLE_SS = 0; - pixel_clk_params->color_depth = - stream->timing.display_color_depth; - pixel_clk_params->flags.DISPLAY_BLANKED = 1; - pixel_clk_params->flags.SUPPORT_YCBCR420 = (stream->timing.pixel_encoding == - PIXEL_ENCODING_YCBCR420); - pixel_clk_params->pixel_encoding = stream->timing.pixel_encoding; - if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) { - pixel_clk_params->color_depth = COLOR_DEPTH_888; - } - if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) { - pixel_clk_params->requested_pix_clk_100hz = pixel_clk_params->requested_pix_clk_100hz / 2; - } - if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) - pixel_clk_params->requested_pix_clk_100hz *= 2; - -} - -static void resource_build_bit_depth_reduction_params(struct dc_stream_state *stream, - struct bit_depth_reduction_params *fmt_bit_depth) -{ - enum dc_dither_option option = stream->dither_option; - enum dc_pixel_encoding pixel_encoding = - stream->timing.pixel_encoding; - - memset(fmt_bit_depth, 0, sizeof(*fmt_bit_depth)); - - if (option == DITHER_OPTION_DEFAULT) { - switch (stream->timing.display_color_depth) { - case COLOR_DEPTH_666: - option = DITHER_OPTION_SPATIAL6; - break; - case COLOR_DEPTH_888: - option = DITHER_OPTION_SPATIAL8; - break; - case COLOR_DEPTH_101010: - option = DITHER_OPTION_SPATIAL10; - break; - default: - option = DITHER_OPTION_DISABLE; - } - } - - if (option == DITHER_OPTION_DISABLE) - return; - - if (option == DITHER_OPTION_TRUN6) { - fmt_bit_depth->flags.TRUNCATE_ENABLED = 1; - fmt_bit_depth->flags.TRUNCATE_DEPTH = 0; - } else if (option == DITHER_OPTION_TRUN8 || - option == DITHER_OPTION_TRUN8_SPATIAL6 || - option == DITHER_OPTION_TRUN8_FM6) { - fmt_bit_depth->flags.TRUNCATE_ENABLED = 1; - fmt_bit_depth->flags.TRUNCATE_DEPTH = 1; - } else if (option == DITHER_OPTION_TRUN10 || - option == DITHER_OPTION_TRUN10_SPATIAL6 || - option == DITHER_OPTION_TRUN10_SPATIAL8 || - option == DITHER_OPTION_TRUN10_FM8 || - option == DITHER_OPTION_TRUN10_FM6 || - option == DITHER_OPTION_TRUN10_SPATIAL8_FM6) { - fmt_bit_depth->flags.TRUNCATE_ENABLED = 1; - fmt_bit_depth->flags.TRUNCATE_DEPTH = 2; - } - - /* special case - Formatter can only reduce by 4 bits at most. - * When reducing from 12 to 6 bits, - * HW recommends we use trunc with round mode - * (if we did nothing, trunc to 10 bits would be used) - * note that any 12->10 bit reduction is ignored prior to DCE8, - * as the input was 10 bits. - */ - if (option == DITHER_OPTION_SPATIAL6_FRAME_RANDOM || - option == DITHER_OPTION_SPATIAL6 || - option == DITHER_OPTION_FM6) { - fmt_bit_depth->flags.TRUNCATE_ENABLED = 1; - fmt_bit_depth->flags.TRUNCATE_DEPTH = 2; - fmt_bit_depth->flags.TRUNCATE_MODE = 1; - } - - /* spatial dither - * note that spatial modes 1-3 are never used - */ - if (option == DITHER_OPTION_SPATIAL6_FRAME_RANDOM || - option == DITHER_OPTION_SPATIAL6 || - option == DITHER_OPTION_TRUN10_SPATIAL6 || - option == DITHER_OPTION_TRUN8_SPATIAL6) { - fmt_bit_depth->flags.SPATIAL_DITHER_ENABLED = 1; - fmt_bit_depth->flags.SPATIAL_DITHER_DEPTH = 0; - fmt_bit_depth->flags.HIGHPASS_RANDOM = 1; - fmt_bit_depth->flags.RGB_RANDOM = - (pixel_encoding == PIXEL_ENCODING_RGB) ? 1 : 0; - } else if (option == DITHER_OPTION_SPATIAL8_FRAME_RANDOM || - option == DITHER_OPTION_SPATIAL8 || - option == DITHER_OPTION_SPATIAL8_FM6 || - option == DITHER_OPTION_TRUN10_SPATIAL8 || - option == DITHER_OPTION_TRUN10_SPATIAL8_FM6) { - fmt_bit_depth->flags.SPATIAL_DITHER_ENABLED = 1; - fmt_bit_depth->flags.SPATIAL_DITHER_DEPTH = 1; - fmt_bit_depth->flags.HIGHPASS_RANDOM = 1; - fmt_bit_depth->flags.RGB_RANDOM = - (pixel_encoding == PIXEL_ENCODING_RGB) ? 1 : 0; - } else if (option == DITHER_OPTION_SPATIAL10_FRAME_RANDOM || - option == DITHER_OPTION_SPATIAL10 || - option == DITHER_OPTION_SPATIAL10_FM8 || - option == DITHER_OPTION_SPATIAL10_FM6) { - fmt_bit_depth->flags.SPATIAL_DITHER_ENABLED = 1; - fmt_bit_depth->flags.SPATIAL_DITHER_DEPTH = 2; - fmt_bit_depth->flags.HIGHPASS_RANDOM = 1; - fmt_bit_depth->flags.RGB_RANDOM = - (pixel_encoding == PIXEL_ENCODING_RGB) ? 1 : 0; - } - - if (option == DITHER_OPTION_SPATIAL6 || - option == DITHER_OPTION_SPATIAL8 || - option == DITHER_OPTION_SPATIAL10) { - fmt_bit_depth->flags.FRAME_RANDOM = 0; - } else { - fmt_bit_depth->flags.FRAME_RANDOM = 1; - } - - ////////////////////// - //// temporal dither - ////////////////////// - if (option == DITHER_OPTION_FM6 || - option == DITHER_OPTION_SPATIAL8_FM6 || - option == DITHER_OPTION_SPATIAL10_FM6 || - option == DITHER_OPTION_TRUN10_FM6 || - option == DITHER_OPTION_TRUN8_FM6 || - option == DITHER_OPTION_TRUN10_SPATIAL8_FM6) { - fmt_bit_depth->flags.FRAME_MODULATION_ENABLED = 1; - fmt_bit_depth->flags.FRAME_MODULATION_DEPTH = 0; - } else if (option == DITHER_OPTION_FM8 || - option == DITHER_OPTION_SPATIAL10_FM8 || - option == DITHER_OPTION_TRUN10_FM8) { - fmt_bit_depth->flags.FRAME_MODULATION_ENABLED = 1; - fmt_bit_depth->flags.FRAME_MODULATION_DEPTH = 1; - } else if (option == DITHER_OPTION_FM10) { - fmt_bit_depth->flags.FRAME_MODULATION_ENABLED = 1; - fmt_bit_depth->flags.FRAME_MODULATION_DEPTH = 2; - } - - fmt_bit_depth->pixel_encoding = pixel_encoding; -} - -/* Move this after the above function as VS complains about - * declaration issues for resource_build_bit_depth_reduction_params. - */ - -static enum dc_status build_pipe_hw_param(struct pipe_ctx *pipe_ctx) -{ - - get_pixel_clock_parameters(pipe_ctx, &pipe_ctx->stream_res.pix_clk_params); - - if (pipe_ctx->clock_source) - pipe_ctx->clock_source->funcs->get_pix_clk_dividers( - pipe_ctx->clock_source, - &pipe_ctx->stream_res.pix_clk_params, - &pipe_ctx->pll_settings); - - pipe_ctx->stream->clamping.pixel_encoding = pipe_ctx->stream->timing.pixel_encoding; - - resource_build_bit_depth_reduction_params(pipe_ctx->stream, - &pipe_ctx->stream->bit_depth_params); - build_clamping_params(pipe_ctx->stream); - - return DC_OK; -} - -bool dml_validate_dsc(struct dc *dc, struct dc_state *new_ctx) -{ - int i; - - /* Validate DSC config, dsc count validation is already done */ - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe_ctx = &new_ctx->res_ctx.pipe_ctx[i]; - struct dc_stream_state *stream = pipe_ctx->stream; - struct dsc_config dsc_cfg; - struct pipe_ctx *odm_pipe; - int opp_cnt = 1; - - for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) - opp_cnt++; - - /* Only need to validate top pipe */ - if (pipe_ctx->top_pipe || pipe_ctx->prev_odm_pipe || !stream || !stream->timing.flags.DSC) - continue; - - dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left - + stream->timing.h_border_right) / opp_cnt; - dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top - + stream->timing.v_border_bottom; - dsc_cfg.pixel_encoding = stream->timing.pixel_encoding; - dsc_cfg.color_depth = stream->timing.display_color_depth; - dsc_cfg.is_odm = pipe_ctx->next_odm_pipe ? true : false; - dsc_cfg.dc_dsc_cfg = stream->timing.dsc_cfg; - dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt; - - if (pipe_ctx->stream_res.dsc && !pipe_ctx->stream_res.dsc->funcs->dsc_validate_stream(pipe_ctx->stream_res.dsc, &dsc_cfg)) - return false; - } - return true; -} - -enum dc_status dml_build_mapped_resource(const struct dc *dc, struct dc_state *context, struct dc_stream_state *stream) -{ - enum dc_status status = DC_OK; - struct pipe_ctx *pipe_ctx = resource_get_head_pipe_for_stream(&context->res_ctx, stream); - - if (!pipe_ctx) - return DC_ERROR_UNEXPECTED; - - - status = build_pipe_hw_param(pipe_ctx); - - return status; -} - -void dml_acquire_dsc(const struct dc *dc, - struct resource_context *res_ctx, - struct display_stream_compressor **dsc, - int pipe_idx) -{ - int i; - const struct resource_pool *pool = dc->res_pool; - struct display_stream_compressor *dsc_old = dc->current_state->res_ctx.pipe_ctx[pipe_idx].stream_res.dsc; - - ASSERT(*dsc == NULL); /* If this ASSERT fails, dsc was not released properly */ - *dsc = NULL; - - /* Always do 1-to-1 mapping when number of DSCs is same as number of pipes */ - if (pool->res_cap->num_dsc == pool->res_cap->num_opp) { - *dsc = pool->dscs[pipe_idx]; - res_ctx->is_dsc_acquired[pipe_idx] = true; - return; - } - - /* Return old DSC to avoid the need for redo it */ - if (dsc_old && !res_ctx->is_dsc_acquired[dsc_old->inst]) { - *dsc = dsc_old; - res_ctx->is_dsc_acquired[dsc_old->inst] = true; - return ; - } - - /* Find first free DSC */ - for (i = 0; i < pool->res_cap->num_dsc; i++) - if (!res_ctx->is_dsc_acquired[i]) { - *dsc = pool->dscs[i]; - res_ctx->is_dsc_acquired[i] = true; - break; - } -} - -static bool dml_split_stream_for_mpc_or_odm( - const struct dc *dc, - struct resource_context *res_ctx, - struct pipe_ctx *pri_pipe, - struct pipe_ctx *sec_pipe, - bool odm) -{ - int pipe_idx = sec_pipe->pipe_idx; - const struct resource_pool *pool = dc->res_pool; - - *sec_pipe = *pri_pipe; - - sec_pipe->pipe_idx = pipe_idx; - sec_pipe->plane_res.mi = pool->mis[pipe_idx]; - sec_pipe->plane_res.hubp = pool->hubps[pipe_idx]; - sec_pipe->plane_res.ipp = pool->ipps[pipe_idx]; - sec_pipe->plane_res.xfm = pool->transforms[pipe_idx]; - sec_pipe->plane_res.dpp = pool->dpps[pipe_idx]; - sec_pipe->plane_res.mpcc_inst = pool->dpps[pipe_idx]->inst; - sec_pipe->stream_res.dsc = NULL; - if (odm) { - if (pri_pipe->next_odm_pipe) { - ASSERT(pri_pipe->next_odm_pipe != sec_pipe); - sec_pipe->next_odm_pipe = pri_pipe->next_odm_pipe; - sec_pipe->next_odm_pipe->prev_odm_pipe = sec_pipe; - } - if (pri_pipe->top_pipe && pri_pipe->top_pipe->next_odm_pipe) { - pri_pipe->top_pipe->next_odm_pipe->bottom_pipe = sec_pipe; - sec_pipe->top_pipe = pri_pipe->top_pipe->next_odm_pipe; - } - if (pri_pipe->bottom_pipe && pri_pipe->bottom_pipe->next_odm_pipe) { - pri_pipe->bottom_pipe->next_odm_pipe->top_pipe = sec_pipe; - sec_pipe->bottom_pipe = pri_pipe->bottom_pipe->next_odm_pipe; - } - pri_pipe->next_odm_pipe = sec_pipe; - sec_pipe->prev_odm_pipe = pri_pipe; - ASSERT(sec_pipe->top_pipe == NULL); - - if (!sec_pipe->top_pipe) - sec_pipe->stream_res.opp = pool->opps[pipe_idx]; - else - sec_pipe->stream_res.opp = sec_pipe->top_pipe->stream_res.opp; - if (sec_pipe->stream->timing.flags.DSC == 1) { - dml_acquire_dsc(dc, res_ctx, &sec_pipe->stream_res.dsc, pipe_idx); - ASSERT(sec_pipe->stream_res.dsc); - if (sec_pipe->stream_res.dsc == NULL) - return false; - } - } else { - if (pri_pipe->bottom_pipe) { - ASSERT(pri_pipe->bottom_pipe != sec_pipe); - sec_pipe->bottom_pipe = pri_pipe->bottom_pipe; - sec_pipe->bottom_pipe->top_pipe = sec_pipe; - } - pri_pipe->bottom_pipe = sec_pipe; - sec_pipe->top_pipe = pri_pipe; - - ASSERT(pri_pipe->plane_state); - } - - return true; -} - -static struct pipe_ctx *dml_find_split_pipe( - struct dc *dc, - struct dc_state *context, - int old_index) -{ - struct pipe_ctx *pipe = NULL; - int i; - - if (old_index >= 0 && context->res_ctx.pipe_ctx[old_index].stream == NULL) { - pipe = &context->res_ctx.pipe_ctx[old_index]; - pipe->pipe_idx = old_index; - } - - if (!pipe) - for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { - if (dc->current_state->res_ctx.pipe_ctx[i].top_pipe == NULL - && dc->current_state->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) { - if (context->res_ctx.pipe_ctx[i].stream == NULL) { - pipe = &context->res_ctx.pipe_ctx[i]; - pipe->pipe_idx = i; - break; - } - } - } - - /* - * May need to fix pipes getting tossed from 1 opp to another on flip - * Add for debugging transient underflow during topology updates: - * ASSERT(pipe); - */ - if (!pipe) - for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { - if (context->res_ctx.pipe_ctx[i].stream == NULL) { - pipe = &context->res_ctx.pipe_ctx[i]; - pipe->pipe_idx = i; - break; - } - } - - return pipe; -} - -static void dml_release_dsc(struct resource_context *res_ctx, - const struct resource_pool *pool, - struct display_stream_compressor **dsc) -{ - int i; - - for (i = 0; i < pool->res_cap->num_dsc; i++) - if (pool->dscs[i] == *dsc) { - res_ctx->is_dsc_acquired[i] = false; - *dsc = NULL; - break; - } -} - -static int dml_get_num_mpc_splits(struct pipe_ctx *pipe) -{ - int mpc_split_count = 0; - struct pipe_ctx *other_pipe = pipe->bottom_pipe; - - while (other_pipe && other_pipe->plane_state == pipe->plane_state) { - mpc_split_count++; - other_pipe = other_pipe->bottom_pipe; - } - other_pipe = pipe->top_pipe; - while (other_pipe && other_pipe->plane_state == pipe->plane_state) { - mpc_split_count++; - other_pipe = other_pipe->top_pipe; - } - - return mpc_split_count; -} - -static bool dml_enough_pipes_for_subvp(struct dc *dc, - struct dc_state *context) -{ - int i = 0; - int num_pipes = 0; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (pipe->stream && pipe->plane_state) - num_pipes++; - } - - // Sub-VP only possible if the number of "real" pipes is - // less than or equal to half the number of available pipes - if (num_pipes * 2 > dc->res_pool->pipe_count) - return false; - - return true; -} - -static int dml_validate_apply_pipe_split_flags( - struct dc *dc, - struct dc_state *context, - int vlevel, - int *split, - bool *merge) -{ - int i, pipe_idx, vlevel_split; - int plane_count = 0; - bool force_split = false; - bool avoid_split = dc->debug.pipe_split_policy == MPC_SPLIT_AVOID; - struct vba_vars_st *v = &context->bw_ctx.dml.vba; - int max_mpc_comb = v->maxMpcComb; - - if (context->stream_count > 1) { - if (dc->debug.pipe_split_policy == MPC_SPLIT_AVOID_MULT_DISP) - avoid_split = true; - } else if (dc->debug.force_single_disp_pipe_split) - force_split = true; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - /** - * Workaround for avoiding pipe-split in cases where we'd split - * planes that are too small, resulting in splits that aren't - * valid for the scaler. - */ - if (pipe->plane_state && - (pipe->plane_state->dst_rect.width <= 16 || - pipe->plane_state->dst_rect.height <= 16 || - pipe->plane_state->src_rect.width <= 16 || - pipe->plane_state->src_rect.height <= 16)) - avoid_split = true; - - /* TODO: fix dc bugs and remove this split threshold thing */ - if (pipe->stream && !pipe->prev_odm_pipe && - (!pipe->top_pipe || pipe->top_pipe->plane_state != pipe->plane_state)) - ++plane_count; - } - if (plane_count > dc->res_pool->pipe_count / 2) - avoid_split = true; - - /* W/A: Mode timing with borders may not work well with pipe split, avoid for this corner case */ - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - struct dc_crtc_timing timing; - - if (!pipe->stream) - continue; - else { - timing = pipe->stream->timing; - if (timing.h_border_left + timing.h_border_right - + timing.v_border_top + timing.v_border_bottom > 0) { - avoid_split = true; - break; - } - } - } - - /* Avoid split loop looks for lowest voltage level that allows most unsplit pipes possible */ - if (avoid_split) { - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - - for (vlevel_split = vlevel; vlevel <= context->bw_ctx.dml.soc.num_states; vlevel++) - if (v->NoOfDPP[vlevel][0][pipe_idx] == 1 && - v->ModeSupport[vlevel][0]) - break; - /* Impossible to not split this pipe */ - if (vlevel > context->bw_ctx.dml.soc.num_states) - vlevel = vlevel_split; - else - max_mpc_comb = 0; - pipe_idx++; - } - v->maxMpcComb = max_mpc_comb; - } - - /* Split loop sets which pipe should be split based on dml outputs and dc flags */ - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - int pipe_plane = v->pipe_plane[pipe_idx]; - bool split4mpc = context->stream_count == 1 && plane_count == 1 - && dc->config.enable_4to1MPC && dc->res_pool->pipe_count >= 4; - - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - - if (split4mpc || v->NoOfDPP[vlevel][max_mpc_comb][pipe_plane] == 4) - split[i] = 4; - else if (force_split || v->NoOfDPP[vlevel][max_mpc_comb][pipe_plane] == 2) - split[i] = 2; - - if ((pipe->stream->view_format == - VIEW_3D_FORMAT_SIDE_BY_SIDE || - pipe->stream->view_format == - VIEW_3D_FORMAT_TOP_AND_BOTTOM) && - (pipe->stream->timing.timing_3d_format == - TIMING_3D_FORMAT_TOP_AND_BOTTOM || - pipe->stream->timing.timing_3d_format == - TIMING_3D_FORMAT_SIDE_BY_SIDE)) - split[i] = 2; - if (dc->debug.force_odm_combine & (1 << pipe->stream_res.tg->inst)) { - split[i] = 2; - v->ODMCombineEnablePerState[vlevel][pipe_plane] = dm_odm_combine_mode_2to1; - } - if (dc->debug.force_odm_combine_4to1 & (1 << pipe->stream_res.tg->inst)) { - split[i] = 4; - v->ODMCombineEnablePerState[vlevel][pipe_plane] = dm_odm_combine_mode_4to1; - } - /*420 format workaround*/ - if (pipe->stream->timing.h_addressable > 7680 && - pipe->stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) { - split[i] = 4; - } - - v->ODMCombineEnabled[pipe_plane] = - v->ODMCombineEnablePerState[vlevel][pipe_plane]; - - if (v->ODMCombineEnabled[pipe_plane] == dm_odm_combine_mode_disabled) { - if (dml_get_num_mpc_splits(pipe) == 1) { - /*If need split for mpc but 2 way split already*/ - if (split[i] == 4) - split[i] = 2; /* 2 -> 4 MPC */ - else if (split[i] == 2) - split[i] = 0; /* 2 -> 2 MPC */ - else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) - merge[i] = true; /* 2 -> 1 MPC */ - } else if (dml_get_num_mpc_splits(pipe) == 3) { - /*If need split for mpc but 4 way split already*/ - if (split[i] == 2 && ((pipe->top_pipe && !pipe->top_pipe->top_pipe) - || !pipe->bottom_pipe)) { - merge[i] = true; /* 4 -> 2 MPC */ - } else if (split[i] == 0 && pipe->top_pipe && - pipe->top_pipe->plane_state == pipe->plane_state) - merge[i] = true; /* 4 -> 1 MPC */ - split[i] = 0; - } else if (dml_get_num_mpc_splits(pipe)) { - /* ODM -> MPC transition */ - if (pipe->prev_odm_pipe) { - split[i] = 0; - merge[i] = true; - } - } - } else { - if (dml_get_num_mpc_splits(pipe) == 1) { - /*If need split for odm but 2 way split already*/ - if (split[i] == 4) - split[i] = 2; /* 2 -> 4 ODM */ - else if (split[i] == 2) - split[i] = 0; /* 2 -> 2 ODM */ - else if (pipe->prev_odm_pipe) { - ASSERT(0); /* NOT expected yet */ - merge[i] = true; /* exit ODM */ - } - } else if (dml_get_num_mpc_splits(pipe) == 3) { - /*If need split for odm but 4 way split already*/ - if (split[i] == 2 && ((pipe->prev_odm_pipe && !pipe->prev_odm_pipe->prev_odm_pipe) - || !pipe->next_odm_pipe)) { - ASSERT(0); /* NOT expected yet */ - merge[i] = true; /* 4 -> 2 ODM */ - } else if (split[i] == 0 && pipe->prev_odm_pipe) { - ASSERT(0); /* NOT expected yet */ - merge[i] = true; /* exit ODM */ - } - split[i] = 0; - } else if (dml_get_num_mpc_splits(pipe)) { - /* MPC -> ODM transition */ - ASSERT(0); /* NOT expected yet */ - if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) { - split[i] = 0; - merge[i] = true; - } - } - } - - /* Adjust dppclk when split is forced, do not bother with dispclk */ - if (split[i] != 0 && v->NoOfDPP[vlevel][max_mpc_comb][pipe_idx] == 1) - v->RequiredDPPCLK[vlevel][max_mpc_comb][pipe_idx] /= 2; - pipe_idx++; - } - - return vlevel; -} - -static void dml_set_phantom_stream_timing(struct dc *dc, - struct dc_state *context, - struct pipe_ctx *ref_pipe, - struct dc_stream_state *phantom_stream) -{ - // phantom_vactive = blackout (latency + margin) + fw_processing_delays + pstate allow width - uint32_t phantom_vactive_us = context->bw_ctx.dml.soc.dram_clock_change_latency_us + 60 + - dc->caps.subvp_fw_processing_delay_us + - dc->caps.subvp_pstate_allow_width_us; - uint32_t phantom_vactive = ((double)phantom_vactive_us/1000000) * - (ref_pipe->stream->timing.pix_clk_100hz * 100) / - (double)ref_pipe->stream->timing.h_total; - uint32_t phantom_bp = ref_pipe->pipe_dlg_param.vstartup_start; - - phantom_stream->dst.y = 0; - phantom_stream->dst.height = phantom_vactive; - phantom_stream->src.y = 0; - phantom_stream->src.height = phantom_vactive; - - phantom_stream->timing.v_addressable = phantom_vactive; - phantom_stream->timing.v_front_porch = 1; - phantom_stream->timing.v_total = phantom_stream->timing.v_addressable + - phantom_stream->timing.v_front_porch + - phantom_stream->timing.v_sync_width + - phantom_bp; -} - -static struct dc_stream_state *dml_enable_phantom_stream(struct dc *dc, - struct dc_state *context, - struct pipe_ctx *ref_pipe) -{ - struct dc_stream_state *phantom_stream = NULL; - - phantom_stream = dc_create_stream_for_sink(ref_pipe->stream->sink); - phantom_stream->signal = SIGNAL_TYPE_VIRTUAL; - phantom_stream->dpms_off = true; - phantom_stream->mall_stream_config.type = SUBVP_PHANTOM; - phantom_stream->mall_stream_config.paired_stream = ref_pipe->stream; - ref_pipe->stream->mall_stream_config.type = SUBVP_MAIN; - ref_pipe->stream->mall_stream_config.paired_stream = phantom_stream; - - /* stream has limited viewport and small timing */ - memcpy(&phantom_stream->timing, &ref_pipe->stream->timing, sizeof(phantom_stream->timing)); - memcpy(&phantom_stream->src, &ref_pipe->stream->src, sizeof(phantom_stream->src)); - memcpy(&phantom_stream->dst, &ref_pipe->stream->dst, sizeof(phantom_stream->dst)); - dml_set_phantom_stream_timing(dc, context, ref_pipe, phantom_stream); - - dc_add_stream_to_ctx(dc, context, phantom_stream); - dc->hwss.apply_ctx_to_hw(dc, context); - return phantom_stream; -} - -static void dml_enable_phantom_plane(struct dc *dc, - struct dc_state *context, - struct dc_stream_state *phantom_stream, - struct pipe_ctx *main_pipe) -{ - struct dc_plane_state *phantom_plane = NULL; - struct dc_plane_state *prev_phantom_plane = NULL; - struct pipe_ctx *curr_pipe = main_pipe; - - while (curr_pipe) { - if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state) - phantom_plane = prev_phantom_plane; - else - phantom_plane = dc_create_plane_state(dc); - - memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address)); - memcpy(&phantom_plane->scaling_quality, &curr_pipe->plane_state->scaling_quality, - sizeof(phantom_plane->scaling_quality)); - memcpy(&phantom_plane->src_rect, &curr_pipe->plane_state->src_rect, sizeof(phantom_plane->src_rect)); - memcpy(&phantom_plane->dst_rect, &curr_pipe->plane_state->dst_rect, sizeof(phantom_plane->dst_rect)); - memcpy(&phantom_plane->clip_rect, &curr_pipe->plane_state->clip_rect, sizeof(phantom_plane->clip_rect)); - memcpy(&phantom_plane->plane_size, &curr_pipe->plane_state->plane_size, - sizeof(phantom_plane->plane_size)); - memcpy(&phantom_plane->tiling_info, &curr_pipe->plane_state->tiling_info, - sizeof(phantom_plane->tiling_info)); - memcpy(&phantom_plane->dcc, &curr_pipe->plane_state->dcc, sizeof(phantom_plane->dcc)); - /* Currently compat_level is undefined in dc_state - * phantom_plane->compat_level = curr_pipe->plane_state->compat_level; - */ - phantom_plane->format = curr_pipe->plane_state->format; - phantom_plane->rotation = curr_pipe->plane_state->rotation; - phantom_plane->visible = curr_pipe->plane_state->visible; - - /* Shadow pipe has small viewport. */ - phantom_plane->clip_rect.y = 0; - phantom_plane->clip_rect.height = phantom_stream->timing.v_addressable; - - dc_add_plane_to_context(dc, phantom_stream, phantom_plane, context); - - curr_pipe = curr_pipe->bottom_pipe; - prev_phantom_plane = phantom_plane; - } -} - -static void dml_add_phantom_pipes(struct dc *dc, struct dc_state *context) -{ - int i = 0; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - struct dc_stream_state *ref_stream = pipe->stream; - // Only construct phantom stream for top pipes that have plane enabled - if (!pipe->top_pipe && pipe->plane_state && pipe->stream && - pipe->stream->mall_stream_config.type == SUBVP_NONE) { - struct dc_stream_state *phantom_stream = NULL; - - phantom_stream = dml_enable_phantom_stream(dc, context, pipe); - dml_enable_phantom_plane(dc, context, phantom_stream, pipe); - } - } - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (pipe->plane_state && pipe->stream && - pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { - pipe->stream->use_dynamic_meta = false; - pipe->plane_state->flip_immediate = false; - if (!resource_build_scaling_params(pipe)) { - // Log / remove phantom pipes since failed to build scaling params - } - } - } -} - -static void dml_remove_phantom_pipes(struct dc *dc, struct dc_state *context) -{ - int i; - bool removed_pipe = false; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - // build scaling params for phantom pipes - if (pipe->plane_state && pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { - dc_rem_all_planes_for_stream(dc, pipe->stream, context); - dc_remove_stream_from_ctx(dc, context, pipe->stream); - removed_pipe = true; - } - - // Clear all phantom stream info - if (pipe->stream) { - pipe->stream->mall_stream_config.type = SUBVP_NONE; - pipe->stream->mall_stream_config.paired_stream = NULL; - } - } - if (removed_pipe) - dc->hwss.apply_ctx_to_hw(dc, context); -} - -/* - * If the input state contains no upstream planes for a particular pipe (i.e. only timing) - * we need to populate some "conservative" plane information as DML cannot handle "no planes" - */ -static void populate_default_plane_from_timing(const struct dc_crtc_timing *timing, struct _vcs_dpi_display_pipe_params_st *pipe) -{ - pipe->src.is_hsplit = pipe->dest.odm_combine != dm_odm_combine_mode_disabled; - pipe->src.source_scan = dm_horz; - pipe->src.sw_mode = dm_sw_4kb_s; - pipe->src.macro_tile_size = dm_64k_tile; - pipe->src.viewport_width = timing->h_addressable; - if (pipe->src.viewport_width > 1920) - pipe->src.viewport_width = 1920; - pipe->src.viewport_height = timing->v_addressable; - if (pipe->src.viewport_height > 1080) - pipe->src.viewport_height = 1080; - pipe->src.surface_height_y = pipe->src.viewport_height; - pipe->src.surface_width_y = pipe->src.viewport_width; - pipe->src.surface_height_c = pipe->src.viewport_height; - pipe->src.surface_width_c = pipe->src.viewport_width; - pipe->src.data_pitch = ((pipe->src.viewport_width + 255) / 256) * 256; - pipe->src.source_format = dm_444_32; - pipe->dest.recout_width = pipe->src.viewport_width; - pipe->dest.recout_height = pipe->src.viewport_height; - pipe->dest.full_recout_width = pipe->dest.recout_width; - pipe->dest.full_recout_height = pipe->dest.recout_height; - pipe->scale_ratio_depth.lb_depth = dm_lb_16; - pipe->scale_ratio_depth.hscl_ratio = 1.0; - pipe->scale_ratio_depth.vscl_ratio = 1.0; - pipe->scale_ratio_depth.scl_enable = 0; - pipe->scale_taps.htaps = 1; - pipe->scale_taps.vtaps = 1; - pipe->dest.vtotal_min = timing->v_total; - pipe->dest.vtotal_max = timing->v_total; - - if (pipe->dest.odm_combine == dm_odm_combine_mode_2to1) { - pipe->src.viewport_width /= 2; - pipe->dest.recout_width /= 2; - } else if (pipe->dest.odm_combine == dm_odm_combine_mode_4to1) { - pipe->src.viewport_width /= 4; - pipe->dest.recout_width /= 4; - } - - pipe->src.dcc = false; - pipe->src.dcc_rate = 1; -} - -/* - * If the pipe is not blending (i.e. pipe_ctx->top pipe == null) then its - * hsplit group is equal to its own pipe ID - * Otherwise, all pipes part of the same blending tree have the same hsplit group - * ID as the top most pipe - * - * If the pipe ctx is ODM combined, then similar logic follows - */ -static void populate_hsplit_group_from_dc_pipe_ctx (const struct pipe_ctx *dc_pipe_ctx, struct _vcs_dpi_display_e2e_pipe_params_st *e2e_pipe) -{ - e2e_pipe->pipe.src.hsplit_grp = dc_pipe_ctx->pipe_idx; - - if (dc_pipe_ctx->top_pipe && dc_pipe_ctx->top_pipe->plane_state - == dc_pipe_ctx->plane_state) { - struct pipe_ctx *first_pipe = dc_pipe_ctx->top_pipe; - int split_idx = 0; - - while (first_pipe->top_pipe && first_pipe->top_pipe->plane_state - == dc_pipe_ctx->plane_state) { - first_pipe = first_pipe->top_pipe; - split_idx++; - } - - /* Treat 4to1 mpc combine as an mpo of 2 2-to-1 combines */ - if (split_idx == 0) - e2e_pipe->pipe.src.hsplit_grp = first_pipe->pipe_idx; - else if (split_idx == 1) - e2e_pipe->pipe.src.hsplit_grp = dc_pipe_ctx->pipe_idx; - else if (split_idx == 2) - e2e_pipe->pipe.src.hsplit_grp = dc_pipe_ctx->top_pipe->pipe_idx; - - } else if (dc_pipe_ctx->prev_odm_pipe) { - struct pipe_ctx *first_pipe = dc_pipe_ctx->prev_odm_pipe; - - while (first_pipe->prev_odm_pipe) - first_pipe = first_pipe->prev_odm_pipe; - e2e_pipe->pipe.src.hsplit_grp = first_pipe->pipe_idx; - } -} - -static void populate_dml_from_dc_pipe_ctx (const struct pipe_ctx *dc_pipe_ctx, struct _vcs_dpi_display_e2e_pipe_params_st *e2e_pipe, int always_scale) -{ - const struct dc_plane_state *pln = dc_pipe_ctx->plane_state; - const struct scaler_data *scl = &dc_pipe_ctx->plane_res.scl_data; - - e2e_pipe->pipe.src.immediate_flip = pln->flip_immediate; - e2e_pipe->pipe.src.is_hsplit = (dc_pipe_ctx->bottom_pipe && dc_pipe_ctx->bottom_pipe->plane_state == pln) - || (dc_pipe_ctx->top_pipe && dc_pipe_ctx->top_pipe->plane_state == pln) - || e2e_pipe->pipe.dest.odm_combine != dm_odm_combine_mode_disabled; - - /* stereo is not split */ - if (pln->stereo_format == PLANE_STEREO_FORMAT_SIDE_BY_SIDE || - pln->stereo_format == PLANE_STEREO_FORMAT_TOP_AND_BOTTOM) { - e2e_pipe->pipe.src.is_hsplit = false; - e2e_pipe->pipe.src.hsplit_grp = dc_pipe_ctx->pipe_idx; - } - - e2e_pipe->pipe.src.source_scan = pln->rotation == ROTATION_ANGLE_90 - || pln->rotation == ROTATION_ANGLE_270 ? dm_vert : dm_horz; - e2e_pipe->pipe.src.viewport_y_y = scl->viewport.y; - e2e_pipe->pipe.src.viewport_y_c = scl->viewport_c.y; - e2e_pipe->pipe.src.viewport_width = scl->viewport.width; - e2e_pipe->pipe.src.viewport_width_c = scl->viewport_c.width; - e2e_pipe->pipe.src.viewport_height = scl->viewport.height; - e2e_pipe->pipe.src.viewport_height_c = scl->viewport_c.height; - e2e_pipe->pipe.src.viewport_width_max = pln->src_rect.width; - e2e_pipe->pipe.src.viewport_height_max = pln->src_rect.height; - e2e_pipe->pipe.src.surface_width_y = pln->plane_size.surface_size.width; - e2e_pipe->pipe.src.surface_height_y = pln->plane_size.surface_size.height; - e2e_pipe->pipe.src.surface_width_c = pln->plane_size.chroma_size.width; - e2e_pipe->pipe.src.surface_height_c = pln->plane_size.chroma_size.height; - - if (pln->format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA - || pln->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { - e2e_pipe->pipe.src.data_pitch = pln->plane_size.surface_pitch; - e2e_pipe->pipe.src.data_pitch_c = pln->plane_size.chroma_pitch; - e2e_pipe->pipe.src.meta_pitch = pln->dcc.meta_pitch; - e2e_pipe->pipe.src.meta_pitch_c = pln->dcc.meta_pitch_c; - } else { - e2e_pipe->pipe.src.data_pitch = pln->plane_size.surface_pitch; - e2e_pipe->pipe.src.meta_pitch = pln->dcc.meta_pitch; - } - e2e_pipe->pipe.src.dcc = pln->dcc.enable; - e2e_pipe->pipe.src.dcc_rate = 1; - e2e_pipe->pipe.dest.recout_width = scl->recout.width; - e2e_pipe->pipe.dest.recout_height = scl->recout.height; - e2e_pipe->pipe.dest.full_recout_height = scl->recout.height; - e2e_pipe->pipe.dest.full_recout_width = scl->recout.width; - if (e2e_pipe->pipe.dest.odm_combine == dm_odm_combine_mode_2to1) - e2e_pipe->pipe.dest.full_recout_width *= 2; - else if (e2e_pipe->pipe.dest.odm_combine == dm_odm_combine_mode_4to1) - e2e_pipe->pipe.dest.full_recout_width *= 4; - else { - struct pipe_ctx *split_pipe = dc_pipe_ctx->bottom_pipe; - - while (split_pipe && split_pipe->plane_state == pln) { - e2e_pipe->pipe.dest.full_recout_width += split_pipe->plane_res.scl_data.recout.width; - split_pipe = split_pipe->bottom_pipe; - } - split_pipe = dc_pipe_ctx->top_pipe; - while (split_pipe && split_pipe->plane_state == pln) { - e2e_pipe->pipe.dest.full_recout_width += split_pipe->plane_res.scl_data.recout.width; - split_pipe = split_pipe->top_pipe; - } - } - - e2e_pipe->pipe.scale_ratio_depth.lb_depth = dm_lb_16; - e2e_pipe->pipe.scale_ratio_depth.hscl_ratio = (double) scl->ratios.horz.value / (1ULL<<32); - e2e_pipe->pipe.scale_ratio_depth.hscl_ratio_c = (double) scl->ratios.horz_c.value / (1ULL<<32); - e2e_pipe->pipe.scale_ratio_depth.vscl_ratio = (double) scl->ratios.vert.value / (1ULL<<32); - e2e_pipe->pipe.scale_ratio_depth.vscl_ratio_c = (double) scl->ratios.vert_c.value / (1ULL<<32); - e2e_pipe->pipe.scale_ratio_depth.scl_enable = - scl->ratios.vert.value != dc_fixpt_one.value - || scl->ratios.horz.value != dc_fixpt_one.value - || scl->ratios.vert_c.value != dc_fixpt_one.value - || scl->ratios.horz_c.value != dc_fixpt_one.value /*Lb only or Full scl*/ - || always_scale; /*support always scale*/ - e2e_pipe->pipe.scale_taps.htaps = scl->taps.h_taps; - e2e_pipe->pipe.scale_taps.htaps_c = scl->taps.h_taps_c; - e2e_pipe->pipe.scale_taps.vtaps = scl->taps.v_taps; - e2e_pipe->pipe.scale_taps.vtaps_c = scl->taps.v_taps_c; - - /* Currently compat_level is not defined. Commenting it until further resolution - * if (pln->compat_level == DC_LEGACY_TILING_ADDR_GEN_TWO) { - swizzle_to_dml_params(pln->tiling_info.gfx9.swizzle, - &e2e_pipe->pipe.src.sw_mode); - e2e_pipe->pipe.src.macro_tile_size = - swizzle_mode_to_macro_tile_size(pln->tiling_info.gfx9.swizzle); - } else { - gfx10array_mode_to_dml_params(pln->tiling_info.gfx10compatible.array_mode, - pln->compat_level, - &e2e_pipe->pipe.src.sw_mode); - e2e_pipe->pipe.src.macro_tile_size = dm_4k_tile; - }*/ - - e2e_pipe->pipe.src.source_format = dc_source_format_to_dml_source_format(pln->format); -} - -static void populate_dml_cursor_parameters_from_dc_pipe_ctx (const struct pipe_ctx *dc_pipe_ctx, struct _vcs_dpi_display_e2e_pipe_params_st *e2e_pipe) -{ - /* - * For graphic plane, cursor number is 1, nv12 is 0 - * bw calculations due to cursor on/off - */ - if (dc_pipe_ctx->plane_state && - (dc_pipe_ctx->plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE || - dc_pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM)) - e2e_pipe->pipe.src.num_cursors = 0; - else - e2e_pipe->pipe.src.num_cursors = 1; - - e2e_pipe->pipe.src.cur0_src_width = 256; - e2e_pipe->pipe.src.cur0_bpp = dm_cur_32bit; -} - -static int populate_dml_pipes_from_context_base( - struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - bool fast_validate) -{ - int pipe_cnt, i; - bool synchronized_vblank = true; - struct resource_context *res_ctx = &context->res_ctx; - - for (i = 0, pipe_cnt = -1; i < dc->res_pool->pipe_count; i++) { - if (!res_ctx->pipe_ctx[i].stream) - continue; - - if (pipe_cnt < 0) { - pipe_cnt = i; - continue; - } - - if (res_ctx->pipe_ctx[pipe_cnt].stream == res_ctx->pipe_ctx[i].stream) - continue; - - if (dc->debug.disable_timing_sync || - (!resource_are_streams_timing_synchronizable( - res_ctx->pipe_ctx[pipe_cnt].stream, - res_ctx->pipe_ctx[i].stream) && - !resource_are_vblanks_synchronizable( - res_ctx->pipe_ctx[pipe_cnt].stream, - res_ctx->pipe_ctx[i].stream))) { - synchronized_vblank = false; - break; - } - } - - for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { - struct dc_crtc_timing *timing = &res_ctx->pipe_ctx[i].stream->timing; - - struct audio_check aud_check = {0}; - if (!res_ctx->pipe_ctx[i].stream) - continue; - - /* todo: - pipes[pipe_cnt].pipe.src.dynamic_metadata_enable = 0; - pipes[pipe_cnt].pipe.src.dcc = 0; - pipes[pipe_cnt].pipe.src.vm = 0;*/ - - pipes[pipe_cnt].clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0; - - pipes[pipe_cnt].dout.dsc_enable = res_ctx->pipe_ctx[i].stream->timing.flags.DSC; - /* todo: rotation?*/ - pipes[pipe_cnt].dout.dsc_slices = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.num_slices_h; - if (res_ctx->pipe_ctx[i].stream->use_dynamic_meta) { - pipes[pipe_cnt].pipe.src.dynamic_metadata_enable = true; - /* 1/2 vblank */ - pipes[pipe_cnt].pipe.src.dynamic_metadata_lines_before_active = - (timing->v_total - timing->v_addressable - - timing->v_border_top - timing->v_border_bottom) / 2; - /* 36 bytes dp, 32 hdmi */ - pipes[pipe_cnt].pipe.src.dynamic_metadata_xmit_bytes = - dc_is_dp_signal(res_ctx->pipe_ctx[i].stream->signal) ? 36 : 32; - } - pipes[pipe_cnt].pipe.dest.synchronized_vblank_all_planes = synchronized_vblank; - - dc_timing_to_dml_timing(timing, &pipes[pipe_cnt].pipe.dest); - pipes[pipe_cnt].pipe.dest.vtotal_min = res_ctx->pipe_ctx[i].stream->adjust.v_total_min; - pipes[pipe_cnt].pipe.dest.vtotal_max = res_ctx->pipe_ctx[i].stream->adjust.v_total_max; - - pipes[pipe_cnt].pipe.dest.otg_inst = res_ctx->pipe_ctx[i].stream_res.tg->inst; - - pipes[pipe_cnt].pipe.dest.odm_combine = get_dml_odm_combine(&res_ctx->pipe_ctx[i]); - - populate_hsplit_group_from_dc_pipe_ctx(&res_ctx->pipe_ctx[i], &pipes[pipe_cnt]); - - pipes[pipe_cnt].dout.dp_lanes = 4; - pipes[pipe_cnt].dout.is_virtual = 0; - pipes[pipe_cnt].dout.output_type = get_dml_output_type(res_ctx->pipe_ctx[i].stream->signal); - if (pipes[pipe_cnt].dout.output_type < 0) { - pipes[pipe_cnt].dout.output_type = dm_dp; - pipes[pipe_cnt].dout.is_virtual = 1; - } - - populate_color_depth_and_encoding_from_timing(&res_ctx->pipe_ctx[i].stream->timing, &pipes[pipe_cnt].dout); - - if (res_ctx->pipe_ctx[i].stream->timing.flags.DSC) - pipes[pipe_cnt].dout.output_bpp = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.bits_per_pixel / 16.0; - - /* todo: default max for now, until there is logic reflecting this in dc*/ - pipes[pipe_cnt].dout.dsc_input_bpc = 12; - /*fill up the audio sample rate (unit in kHz)*/ - get_audio_check(&res_ctx->pipe_ctx[i].stream->audio_info, &aud_check); - pipes[pipe_cnt].dout.max_audio_sample_rate = aud_check.max_audiosample_rate / 1000; - - populate_dml_cursor_parameters_from_dc_pipe_ctx(&res_ctx->pipe_ctx[i], &pipes[pipe_cnt]); - - if (!res_ctx->pipe_ctx[i].plane_state) { - populate_default_plane_from_timing(timing, &pipes[pipe_cnt].pipe); - } else { - populate_dml_from_dc_pipe_ctx(&res_ctx->pipe_ctx[i], &pipes[pipe_cnt], dc->debug.always_scale); - } - - pipe_cnt++; - } - - /* populate writeback information */ - if (dc->res_pool) - dc->res_pool->funcs->populate_dml_writeback_from_context(dc, res_ctx, pipes); - - return pipe_cnt; -} - -static int dml_populate_dml_pipes_from_context( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - bool fast_validate) -{ - int i, pipe_cnt; - struct resource_context *res_ctx = &context->res_ctx; - struct pipe_ctx *pipe = NULL; // Fix potentially uninitialized error from VS - - populate_dml_pipes_from_context_base(dc, context, pipes, fast_validate); - - for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { - struct dc_crtc_timing *timing; - - if (!res_ctx->pipe_ctx[i].stream) - continue; - pipe = &res_ctx->pipe_ctx[i]; - timing = &pipe->stream->timing; - - pipes[pipe_cnt].pipe.src.gpuvm = true; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; - pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; - - pipes[pipe_cnt].dout.dsc_input_bpc = 0; - if (pipes[pipe_cnt].dout.dsc_enable) { - switch (timing->display_color_depth) { - case COLOR_DEPTH_888: - pipes[pipe_cnt].dout.dsc_input_bpc = 8; - break; - case COLOR_DEPTH_101010: - pipes[pipe_cnt].dout.dsc_input_bpc = 10; - break; - case COLOR_DEPTH_121212: - pipes[pipe_cnt].dout.dsc_input_bpc = 12; - break; - default: - ASSERT(0); - break; - } - } - pipe_cnt++; - } - dc->config.enable_4to1MPC = false; - if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) { - if (is_dual_plane(pipe->plane_state->format) - && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) { - dc->config.enable_4to1MPC = true; - } else if (!is_dual_plane(pipe->plane_state->format)) { - context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; - pipes[0].pipe.src.unbounded_req_mode = true; - } - } - - return pipe_cnt; -} - -static void dml_full_validate_bw_helper(struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int *vlevel, - int *split, - bool *merge, - int *pipe_cnt) -{ - struct vba_vars_st *vba = &context->bw_ctx.dml.vba; - - /* - * DML favors voltage over p-state, but we're more interested in - * supporting p-state over voltage. We can't support p-state in - * prefetch mode > 0 so try capping the prefetch mode to start. - */ - context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank = - dm_allow_self_refresh_and_mclk_switch; - *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); - /* This may adjust vlevel and maxMpcComb */ - if (*vlevel < context->bw_ctx.dml.soc.num_states) - *vlevel = dml_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge); - - /* Conditions for setting up phantom pipes for SubVP: - * 1. Not force disable SubVP - * 2. Full update (i.e. !fast_validate) - * 3. Enough pipes are available to support SubVP (TODO: Which pipes will use VACTIVE / VBLANK / SUBVP?) - * 4. Display configuration passes validation - * 5. (Config doesn't support MCLK in VACTIVE/VBLANK || dc->debug.force_subvp_mclk_switch) - */ - if (!dc->debug.force_disable_subvp && - dml_enough_pipes_for_subvp(dc, context) && - *vlevel < context->bw_ctx.dml.soc.num_states && - (vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported || - dc->debug.force_subvp_mclk_switch)) { - - dml_add_phantom_pipes(dc, context); - - /* Create input to DML based on new context which includes phantom pipes - * TODO: Input to DML should mark which pipes are phantom - */ - *pipe_cnt = dml_populate_dml_pipes_from_context(dc, context, pipes, false); - *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); - if (*vlevel < context->bw_ctx.dml.soc.num_states) { - memset(split, 0, MAX_PIPES * sizeof(*split)); - memset(merge, 0, MAX_PIPES * sizeof(*merge)); - *vlevel = dml_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge); - } - - // If SubVP pipe config is unsupported (or cannot be used for UCLK switching) - // remove phantom pipes and repopulate dml pipes - if (*vlevel == context->bw_ctx.dml.soc.num_states || - vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) { - dml_remove_phantom_pipes(dc, context); - *pipe_cnt = dml_populate_dml_pipes_from_context(dc, context, pipes, false); - } - } -} - -static void dcn20_adjust_adaptive_sync_v_startup( - const struct dc_crtc_timing *dc_crtc_timing, int *vstartup_start) -{ - struct dc_crtc_timing patched_crtc_timing; - uint32_t asic_blank_end = 0; - uint32_t asic_blank_start = 0; - uint32_t newVstartup = 0; - - patched_crtc_timing = *dc_crtc_timing; - - if (patched_crtc_timing.flags.INTERLACE == 1) { - if (patched_crtc_timing.v_front_porch < 2) - patched_crtc_timing.v_front_porch = 2; - } else { - if (patched_crtc_timing.v_front_porch < 1) - patched_crtc_timing.v_front_porch = 1; - } - - /* blank_start = frame end - front porch */ - asic_blank_start = patched_crtc_timing.v_total - - patched_crtc_timing.v_front_porch; - - /* blank_end = blank_start - active */ - asic_blank_end = asic_blank_start - - patched_crtc_timing.v_border_bottom - - patched_crtc_timing.v_addressable - - patched_crtc_timing.v_border_top; - - newVstartup = asic_blank_end + (patched_crtc_timing.v_total - asic_blank_start); - - *vstartup_start = ((newVstartup > *vstartup_start) ? newVstartup : *vstartup_start); -} - -static bool is_dp_128b_132b_signal(struct pipe_ctx *pipe_ctx) -{ - return (pipe_ctx->stream_res.hpo_dp_stream_enc && - pipe_ctx->link_res.hpo_dp_link_enc && - dc_is_dp_signal(pipe_ctx->stream->signal)); -} - -static bool is_dtbclk_required(struct dc *dc, struct dc_state *context) -{ - int i; - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - if (is_dp_128b_132b_signal(&context->res_ctx.pipe_ctx[i])) - return true; - } - return false; -} - -static void dml_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) -{ - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us; - context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us; - } -} - -static bool dml_internal_validate( - struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int *pipe_cnt_out, - int *vlevel_out, - bool fast_validate) -{ - bool out = false; - bool repopulate_pipes = false; - int split[MAX_PIPES] = { 0 }; - bool merge[MAX_PIPES] = { false }; - bool newly_split[MAX_PIPES] = { false }; - int pipe_cnt, i, pipe_idx, vlevel; - struct vba_vars_st *vba = &context->bw_ctx.dml.vba; - - ASSERT(pipes); - if (!pipes) - return false; - - // For each full update, remove all existing phantom pipes first - dml_remove_phantom_pipes(dc, context); - - dml_update_soc_for_wm_a(dc, context); - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (pipe->plane_state) { - // On initial pass through DML, we intend to use MALL for SS on all - // (non-PSR) surfaces with none using MALL for P-State - // 'mall_plane_config': is not a member of 'dc_plane_state' - commenting it out till mall_plane_config gets supported in dc_plant_state - //if (pipe->stream && pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED) - // pipe->plane_state->mall_plane_config.use_mall_for_ss = true; - } - } - pipe_cnt = dml_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); - - if (!pipe_cnt) { - out = true; - goto validate_out; - } - - dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt); - - if (!fast_validate) { - dml_full_validate_bw_helper(dc, context, pipes, &vlevel, split, merge, &pipe_cnt); - } - - if (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states || - vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) { - /* - * If mode is unsupported or there's still no p-state support then - * fall back to favoring voltage. - * - * We don't actually support prefetch mode 2, so require that we - * at least support prefetch mode 1. - */ - context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank = - dm_allow_self_refresh; - - vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); - if (vlevel < context->bw_ctx.dml.soc.num_states) { - memset(split, 0, sizeof(split)); - memset(merge, 0, sizeof(merge)); - vlevel = dml_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); - } - } - - dml_log_mode_support_params(&context->bw_ctx.dml); - - if (vlevel == context->bw_ctx.dml.soc.num_states) - goto validate_fail; - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - struct pipe_ctx *mpo_pipe = pipe->bottom_pipe; - - if (!pipe->stream) - continue; - - /* We only support full screen mpo with ODM */ - if (vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled - && pipe->plane_state && mpo_pipe - && memcmp(&mpo_pipe->plane_res.scl_data.recout, - &pipe->plane_res.scl_data.recout, - sizeof(struct rect)) != 0) { - ASSERT(mpo_pipe->plane_state != pipe->plane_state); - goto validate_fail; - } - pipe_idx++; - } - - /* merge pipes if necessary */ - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - /*skip pipes that don't need merging*/ - if (!merge[i]) - continue; - - /* if ODM merge we ignore mpc tree, mpo pipes will have their own flags */ - if (pipe->prev_odm_pipe) { - /*split off odm pipe*/ - pipe->prev_odm_pipe->next_odm_pipe = pipe->next_odm_pipe; - if (pipe->next_odm_pipe) - pipe->next_odm_pipe->prev_odm_pipe = pipe->prev_odm_pipe; - - pipe->bottom_pipe = NULL; - pipe->next_odm_pipe = NULL; - pipe->plane_state = NULL; - pipe->stream = NULL; - pipe->top_pipe = NULL; - pipe->prev_odm_pipe = NULL; - if (pipe->stream_res.dsc) - dml_release_dsc(&context->res_ctx, dc->res_pool, &pipe->stream_res.dsc); - memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); - memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); - repopulate_pipes = true; - } else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) { - struct pipe_ctx *top_pipe = pipe->top_pipe; - struct pipe_ctx *bottom_pipe = pipe->bottom_pipe; - - top_pipe->bottom_pipe = bottom_pipe; - if (bottom_pipe) - bottom_pipe->top_pipe = top_pipe; - - pipe->top_pipe = NULL; - pipe->bottom_pipe = NULL; - pipe->plane_state = NULL; - pipe->stream = NULL; - memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); - memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); - repopulate_pipes = true; - } else - ASSERT(0); /* Should never try to merge master pipe */ - - } - - for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; - struct pipe_ctx *hsplit_pipe = NULL; - bool odm; - int old_index = -1; - - if (!pipe->stream || newly_split[i]) - continue; - - pipe_idx++; - odm = vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled; - - if (!pipe->plane_state && !odm) - continue; - - if (split[i]) { - if (odm) { - if (split[i] == 4 && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe) - old_index = old_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; - else if (old_pipe->next_odm_pipe) - old_index = old_pipe->next_odm_pipe->pipe_idx; - } else { - if (split[i] == 4 && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe && - old_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) - old_index = old_pipe->bottom_pipe->bottom_pipe->pipe_idx; - else if (old_pipe->bottom_pipe && - old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) - old_index = old_pipe->bottom_pipe->pipe_idx; - } - hsplit_pipe = dml_find_split_pipe(dc, context, old_index); - ASSERT(hsplit_pipe); - if (!hsplit_pipe) - goto validate_fail; - - if (!dml_split_stream_for_mpc_or_odm( - dc, &context->res_ctx, - pipe, hsplit_pipe, odm)) - goto validate_fail; - - newly_split[hsplit_pipe->pipe_idx] = true; - repopulate_pipes = true; - } - if (split[i] == 4) { - struct pipe_ctx *pipe_4to1; - - if (odm && old_pipe->next_odm_pipe) - old_index = old_pipe->next_odm_pipe->pipe_idx; - else if (!odm && old_pipe->bottom_pipe && - old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) - old_index = old_pipe->bottom_pipe->pipe_idx; - else - old_index = -1; - pipe_4to1 = dml_find_split_pipe(dc, context, old_index); - ASSERT(pipe_4to1); - if (!pipe_4to1) - goto validate_fail; - if (!dml_split_stream_for_mpc_or_odm( - dc, &context->res_ctx, - pipe, pipe_4to1, odm)) - goto validate_fail; - newly_split[pipe_4to1->pipe_idx] = true; - - if (odm && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe - && old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe) - old_index = old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; - else if (!odm && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe && - old_pipe->bottom_pipe->bottom_pipe->bottom_pipe && - old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) - old_index = old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->pipe_idx; - else - old_index = -1; - pipe_4to1 = dml_find_split_pipe(dc, context, old_index); - ASSERT(pipe_4to1); - if (!pipe_4to1) - goto validate_fail; - if (!dml_split_stream_for_mpc_or_odm( - dc, &context->res_ctx, - hsplit_pipe, pipe_4to1, odm)) - goto validate_fail; - newly_split[pipe_4to1->pipe_idx] = true; - } - if (odm) - dml_build_mapped_resource(dc, context, pipe->stream); - } - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (pipe->plane_state) { - if (!resource_build_scaling_params(pipe)) - goto validate_fail; - } - } - - /* Actual dsc count per stream dsc validation*/ - if (!dml_validate_dsc(dc, context)) { - vba->ValidationStatus[vba->soc.num_states] = DML_FAIL_DSC_VALIDATION_FAILURE; - goto validate_fail; - } - - if (repopulate_pipes) - pipe_cnt = dml_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); - *vlevel_out = vlevel; - *pipe_cnt_out = pipe_cnt; - - out = true; - goto validate_out; - -validate_fail: - out = false; - -validate_out: - return out; -} - -static void dml_calculate_dlg_params( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel) -{ - int i, pipe_idx; - int plane_count; - - /* Writeback MCIF_WB arbitration parameters */ - if (dc->res_pool) - dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt); - - context->bw_ctx.bw.dcn.clk.dispclk_khz = context->bw_ctx.dml.vba.DISPCLK * 1000; - context->bw_ctx.bw.dcn.clk.dcfclk_khz = context->bw_ctx.dml.vba.DCFCLK * 1000; - context->bw_ctx.bw.dcn.clk.socclk_khz = context->bw_ctx.dml.vba.SOCCLK * 1000; - context->bw_ctx.bw.dcn.clk.dramclk_khz = context->bw_ctx.dml.vba.DRAMSpeed * 1000 / 16; - context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = context->bw_ctx.dml.vba.DCFCLKDeepSleep * 1000; - context->bw_ctx.bw.dcn.clk.fclk_khz = context->bw_ctx.dml.vba.FabricClock * 1000; - context->bw_ctx.bw.dcn.clk.p_state_change_support = - context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] - != dm_dram_clock_change_unsupported; - - context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; - /* 'z9_support': is not a member of 'dc_clocks' - Commenting out till we have this support in dc_clocks - * context->bw_ctx.bw.dcn.clk.z9_support = (context->bw_ctx.dml.vba.StutterPeriod > 5000.0) ? - DCN_Z9_SUPPORT_ALLOW : DCN_Z9_SUPPORT_DISALLOW; - */ - plane_count = 0; - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (context->res_ctx.pipe_ctx[i].plane_state) - plane_count++; - } - - /* Commented out as per above error for now. - if (plane_count == 0) - context->bw_ctx.bw.dcn.clk.z9_support = DCN_Z9_SUPPORT_ALLOW; - */ - context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context); - context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = - context->bw_ctx.dml.vba.FCLKChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; - if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz) - context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz; - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) { - // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests - context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0; - context->res_ctx.pipe_ctx[i].unbounded_req = false; - } else { - context->res_ctx.pipe_ctx[i].det_buffer_size_kb = context->bw_ctx.dml.ip.det_buffer_size_kbytes; - context->res_ctx.pipe_ctx[i].unbounded_req = pipes[pipe_idx].pipe.src.unbounded_req_mode; - } - - if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) - context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; - context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = - pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; - context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest; - pipe_idx++; - } - /*save a original dppclock copy*/ - context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz; - context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz; - context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dppclk_mhz * 1000; - context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dispclk_mhz * 1000; - context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes - - context->bw_ctx.dml.ip.det_buffer_size_kbytes * pipe_idx; - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - bool cstate_en = context->bw_ctx.dml.vba.PrefetchMode[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != 2; - - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - - context->bw_ctx.dml.funcs.rq_dlg_get_dlg_reg(&context->bw_ctx.dml, - &context->res_ctx.pipe_ctx[i].dlg_regs, - &context->res_ctx.pipe_ctx[i].ttu_regs, - pipes, - pipe_cnt, - pipe_idx, - cstate_en, - context->bw_ctx.bw.dcn.clk.p_state_change_support, - false, false, true); - - context->bw_ctx.dml.funcs.rq_dlg_get_rq_reg(&context->bw_ctx.dml, - &context->res_ctx.pipe_ctx[i].rq_regs, - &pipes[pipe_idx].pipe); - pipe_idx++; - } -} - -static void dml_calculate_wm_and_dlg( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel) -{ - int i, pipe_idx, vlevel_temp = 0; - - double dcfclk = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz; - double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; - unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; - bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != - dm_dram_clock_change_unsupported; - - /* Set B: - * For Set B calculations use clocks from clock_limits[2] when available i.e. when SMU is present, - * otherwise use arbitrary low value from spreadsheet for DCFCLK as lower is safer for watermark - * calculations to cover bootup clocks. - * DCFCLK: soc.clock_limits[2] when available - * UCLK: soc.clock_limits[2] when available - */ - if (context->bw_ctx.dml.soc.num_states > 2) { - vlevel_temp = 2; - dcfclk = context->bw_ctx.dml.soc.clock_limits[2].dcfclk_mhz; - } else - dcfclk = 615; //DCFCLK Vmin_lv - - pipes[0].clks_cfg.voltage = vlevel_temp; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk; - pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz; - - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - //context->bw_ctx.bw.dcn.watermarks.b.usr_retraining_ns = get_wm_usr_retraining(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - - /* Temporary, to have some fclk_pstate_change_ns and usr_retraining_ns wm values until DML is implemented */ - //context->bw_ctx.bw.dcn.watermarks.b.usr_retraining = context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns / 8; - - /* Set D: - * All clocks min. - * DCFCLK: Min, as reported by PM FW when available - * UCLK : Min, as reported by PM FW when available - * sr_enter_exit/sr_exit should be lower than used for DRAM (TBD after bringup or later, use as decided in Clk Mgr) - */ - - if (context->bw_ctx.dml.soc.num_states > 2) { - vlevel_temp = 0; - dcfclk = dc->clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz; - } else - dcfclk = 615; //DCFCLK Vmin_lv - - pipes[0].clks_cfg.voltage = vlevel_temp; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk; - pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz; - - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - //context->bw_ctx.bw.dcn.watermarks.d.usr_retraining_ns = get_wm_usr_retraining(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - - /* Temporary, to have some fclk_pstate_change_ns and usr_retraining_ns wm values until DML is implemented */ - //context->bw_ctx.bw.dcn.watermarks.d.usr_retraining = context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns / 8; - /* Set C, for Dummy P-State: - * All clocks min. - * DCFCLK: Min, as reported by PM FW, when available - * UCLK : Min, as reported by PM FW, when available - * pstate latency as per UCLK state dummy pstate latency - */ - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { - unsigned int min_dram_speed_mts_margin = 160; - - if ((!pstate_en)) - min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16; - - /* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */ - for (i = 3; i > 0; i--) - if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts) - break; - - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us; - context->bw_ctx.dml.soc.dummy_pstate_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - //context->bw_ctx.bw.dcn.watermarks.c.usr_retraining_ns = get_wm_usr_retraining(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - - /* Temporary, to have some fclk_pstate_change_ns and usr_retraining_ns wm values until DML is implemented */ - //context->bw_ctx.bw.dcn.watermarks.c.usr_retraining = context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns / 8; - if ((!pstate_en) && (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid)) { - /* The only difference between A and C is p-state latency, if p-state is not supported - * with full p-state latency we want to calculate DLG based on dummy p-state latency, - * Set A p-state watermark set to 0 previously, when p-state unsupported, for now keep as previous implementation. - */ - context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0; - } else { - /* Set A: - * All clocks min. - * DCFCLK: Min, as reported by PM FW, when available - * UCLK: Min, as reported by PM FW, when available - */ - dml_update_soc_for_wm_a(dc, context); - context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - } - - pipes[0].clks_cfg.voltage = vlevel; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk_from_validation; - pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - - pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); - pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - - if (dc->config.forced_clocks) { - pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; - pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; - } - if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) - pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; - if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) - pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; - - pipe_idx++; - } - - context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod; - - dml_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); - - if (!pstate_en) - /* Restore full p-state latency */ - context->bw_ctx.dml.soc.dram_clock_change_latency_us = - dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; -} - -bool dml_validate(struct dc *dc, - struct dc_state *context, - bool fast_validate) -{ - bool out = false; - - BW_VAL_TRACE_SETUP(); - - int vlevel = 0; - int pipe_cnt = 0; - display_e2e_pipe_params_st *pipes = context->bw_ctx.dml.dml_pipe_state; - DC_LOGGER_INIT(dc->ctx->logger); - - BW_VAL_TRACE_COUNT(); - - out = dml_internal_validate(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate); - - if (pipe_cnt == 0) - goto validate_out; - - if (!out) - goto validate_fail; - - BW_VAL_TRACE_END_VOLTAGE_LEVEL(); - - if (fast_validate) { - BW_VAL_TRACE_SKIP(fast); - goto validate_out; - } - - dml_calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel); - - BW_VAL_TRACE_END_WATERMARKS(); - - goto validate_out; - -validate_fail: - DC_LOG_WARNING("Mode Validation Warning: %s failed validation.\n", - dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states])); - - BW_VAL_TRACE_SKIP(fail); - out = false; - -validate_out: - BW_VAL_TRACE_FINISH(); - - return out; -} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper_translation.c b/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper_translation.c deleted file mode 100644 index 4ec5310a2962d..0000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper_translation.c +++ /dev/null @@ -1,284 +0,0 @@ -/* - * Copyright 2017 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifdef DML_WRAPPER_TRANSLATION_ - -static void gfx10array_mode_to_dml_params( - enum array_mode_values array_mode, - enum legacy_tiling_compat_level compat_level, - unsigned int *sw_mode) -{ - switch (array_mode) { - case DC_ARRAY_LINEAR_ALLIGNED: - case DC_ARRAY_LINEAR_GENERAL: - *sw_mode = dm_sw_linear; - break; - case DC_ARRAY_2D_TILED_THIN1: -// DC_LEGACY_TILING_ADDR_GEN_ZERO - undefined as per current code hence removed -#if 0 - if (compat_level == DC_LEGACY_TILING_ADDR_GEN_ZERO) - *sw_mode = dm_sw_gfx7_2d_thin_l_vp; - else - *sw_mode = dm_sw_gfx7_2d_thin_gl; -#endif - break; - default: - ASSERT(0); /* Not supported */ - break; - } -} - -static void swizzle_to_dml_params( - enum swizzle_mode_values swizzle, - unsigned int *sw_mode) -{ - switch (swizzle) { - case DC_SW_LINEAR: - *sw_mode = dm_sw_linear; - break; - case DC_SW_4KB_S: - *sw_mode = dm_sw_4kb_s; - break; - case DC_SW_4KB_S_X: - *sw_mode = dm_sw_4kb_s_x; - break; - case DC_SW_4KB_D: - *sw_mode = dm_sw_4kb_d; - break; - case DC_SW_4KB_D_X: - *sw_mode = dm_sw_4kb_d_x; - break; - case DC_SW_64KB_S: - *sw_mode = dm_sw_64kb_s; - break; - case DC_SW_64KB_S_X: - *sw_mode = dm_sw_64kb_s_x; - break; - case DC_SW_64KB_S_T: - *sw_mode = dm_sw_64kb_s_t; - break; - case DC_SW_64KB_D: - *sw_mode = dm_sw_64kb_d; - break; - case DC_SW_64KB_D_X: - *sw_mode = dm_sw_64kb_d_x; - break; - case DC_SW_64KB_D_T: - *sw_mode = dm_sw_64kb_d_t; - break; - case DC_SW_64KB_R_X: - *sw_mode = dm_sw_64kb_r_x; - break; - case DC_SW_VAR_S: - *sw_mode = dm_sw_var_s; - break; - case DC_SW_VAR_S_X: - *sw_mode = dm_sw_var_s_x; - break; - case DC_SW_VAR_D: - *sw_mode = dm_sw_var_d; - break; - case DC_SW_VAR_D_X: - *sw_mode = dm_sw_var_d_x; - break; - - default: - ASSERT(0); /* Not supported */ - break; - } -} - -static void dc_timing_to_dml_timing(const struct dc_crtc_timing *timing, struct _vcs_dpi_display_pipe_dest_params_st *dest) -{ - dest->hblank_start = timing->h_total - timing->h_front_porch; - dest->hblank_end = dest->hblank_start - - timing->h_addressable - - timing->h_border_left - - timing->h_border_right; - dest->vblank_start = timing->v_total - timing->v_front_porch; - dest->vblank_end = dest->vblank_start - - timing->v_addressable - - timing->v_border_top - - timing->v_border_bottom; - dest->htotal = timing->h_total; - dest->vtotal = timing->v_total; - dest->hactive = timing->h_addressable; - dest->vactive = timing->v_addressable; - dest->interlaced = timing->flags.INTERLACE; - dest->pixel_rate_mhz = timing->pix_clk_100hz/10000.0; - if (timing->timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) - dest->pixel_rate_mhz *= 2; -} - -static enum odm_combine_mode get_dml_odm_combine(const struct pipe_ctx *pipe) -{ - int odm_split_count = 0; - enum odm_combine_mode combine_mode = dm_odm_combine_mode_disabled; - struct pipe_ctx *next_pipe = pipe->next_odm_pipe; - - // Traverse pipe tree to determine odm split count - while (next_pipe) { - odm_split_count++; - next_pipe = next_pipe->next_odm_pipe; - } - pipe = pipe->prev_odm_pipe; - while (pipe) { - odm_split_count++; - pipe = pipe->prev_odm_pipe; - } - - // Translate split to DML odm combine factor - switch (odm_split_count) { - case 1: - combine_mode = dm_odm_combine_mode_2to1; - break; - case 3: - combine_mode = dm_odm_combine_mode_4to1; - break; - default: - combine_mode = dm_odm_combine_mode_disabled; - } - - return combine_mode; -} - -static int get_dml_output_type(enum signal_type dc_signal) -{ - int dml_output_type = -1; - - switch (dc_signal) { - case SIGNAL_TYPE_DISPLAY_PORT_MST: - case SIGNAL_TYPE_DISPLAY_PORT: - dml_output_type = dm_dp; - break; - case SIGNAL_TYPE_EDP: - dml_output_type = dm_edp; - break; - case SIGNAL_TYPE_HDMI_TYPE_A: - case SIGNAL_TYPE_DVI_SINGLE_LINK: - case SIGNAL_TYPE_DVI_DUAL_LINK: - dml_output_type = dm_hdmi; - break; - default: - break; - } - - return dml_output_type; -} - -static void populate_color_depth_and_encoding_from_timing(const struct dc_crtc_timing *timing, struct _vcs_dpi_display_output_params_st *dout) -{ - int output_bpc = 0; - - switch (timing->display_color_depth) { - case COLOR_DEPTH_666: - output_bpc = 6; - break; - case COLOR_DEPTH_888: - output_bpc = 8; - break; - case COLOR_DEPTH_101010: - output_bpc = 10; - break; - case COLOR_DEPTH_121212: - output_bpc = 12; - break; - case COLOR_DEPTH_141414: - output_bpc = 14; - break; - case COLOR_DEPTH_161616: - output_bpc = 16; - break; - case COLOR_DEPTH_999: - output_bpc = 9; - break; - case COLOR_DEPTH_111111: - output_bpc = 11; - break; - default: - output_bpc = 8; - break; - } - - switch (timing->pixel_encoding) { - case PIXEL_ENCODING_RGB: - case PIXEL_ENCODING_YCBCR444: - dout->output_format = dm_444; - dout->output_bpp = output_bpc * 3; - break; - case PIXEL_ENCODING_YCBCR420: - dout->output_format = dm_420; - dout->output_bpp = (output_bpc * 3.0) / 2; - break; - case PIXEL_ENCODING_YCBCR422: - if (timing->flags.DSC && !timing->dsc_cfg.ycbcr422_simple) - dout->output_format = dm_n422; - else - dout->output_format = dm_s422; - dout->output_bpp = output_bpc * 2; - break; - default: - dout->output_format = dm_444; - dout->output_bpp = output_bpc * 3; - } -} - -static enum source_format_class dc_source_format_to_dml_source_format(enum surface_pixel_format dc_format) -{ - enum source_format_class dml_format = dm_444_32; - - switch (dc_format) { - case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr: - case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb: - dml_format = dm_420_8; - break; - case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr: - case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb: - dml_format = dm_420_10; - break; - case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: - case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: - case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: - dml_format = dm_444_64; - break; - case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555: - case SURFACE_PIXEL_FORMAT_GRPH_RGB565: - dml_format = dm_444_16; - break; - case SURFACE_PIXEL_FORMAT_GRPH_PALETA_256_COLORS: - dml_format = dm_444_8; - break; - case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA: - dml_format = dm_rgbe_alpha; - break; - default: - dml_format = dm_444_32; - break; - } - - return dml_format; -} - -#endif From 443f93876601773e47332bb8f91819a4589e8b3c Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Tue, 9 Aug 2022 14:42:46 +0800 Subject: [PATCH 10/56] drm/amd/pm: update the smu driver interface version for SMU IP v13.0.4 The pmfw has changed the driver interface version, so keep same with the fw. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index c02e5e5767282..6fe2fe92ebd75 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -28,7 +28,7 @@ #define SMU13_DRIVER_IF_VERSION_INV 0xFFFFFFFF #define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04 #define SMU13_DRIVER_IF_VERSION_ALDE 0x08 -#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x04 +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x05 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x2C #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C From 0de184960610196d694096c55bcf76698f0fd25d Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Tue, 9 Aug 2022 17:46:39 +0800 Subject: [PATCH 11/56] drm/amdgpu: add MMHUB IP v3.0.1 Clock Gating support Add set/get_clockgating for MMHUB IP v3.0.1. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c | 42 +++++++++++++++++++++-- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c index cac72ced94c85..e8058edc1d108 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c @@ -518,18 +518,41 @@ static u64 mmhub_v3_0_1_get_mc_fb_offset(struct amdgpu_device *adev) static void mmhub_v3_0_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, bool enable) { - //TODO + uint32_t def, data; + + def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG); + + if (enable) + data |= MM_ATC_L2_MISC_CG__ENABLE_MASK; + else + data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK; + + if (def != data) + WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data); } static void mmhub_v3_0_1_update_medium_grain_light_sleep(struct amdgpu_device *adev, bool enable) { - //TODO + uint32_t def, data; + + def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG); + + if (enable) + data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + else + data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + + if (def != data) + WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data); } static int mmhub_v3_0_1_set_clockgating(struct amdgpu_device *adev, enum amd_clockgating_state state) { + if (amdgpu_sriov_vf(adev)) + return 0; + mmhub_v3_0_1_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); mmhub_v3_0_1_update_medium_grain_light_sleep(adev, @@ -539,7 +562,20 @@ static int mmhub_v3_0_1_set_clockgating(struct amdgpu_device *adev, static void mmhub_v3_0_1_get_clockgating(struct amdgpu_device *adev, u64 *flags) { - //TODO + int data; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG); + + /* AMD_CG_SUPPORT_MC_MGCG */ + if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_MC_MGCG; + + /* AMD_CG_SUPPORT_MC_LS */ + if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_MC_LS; } const struct amdgpu_mmhub_funcs mmhub_v3_0_1_funcs = { From a86bcaa1ff085bd9ce3eb116eefacc2031abd205 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Tue, 9 Aug 2022 18:59:49 +0800 Subject: [PATCH 12/56] drm/amdgpu: add HDP IP v5.2.1 Clock Gating support Add set/get_clockgating for HDP IP v5.2.1. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c | 150 ++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c index 39a696cd45b5e..29c3484ae1f16 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c @@ -40,6 +40,156 @@ static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev, 0); } +static void hdp_v5_2_update_mem_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t hdp_clk_cntl; + uint32_t hdp_mem_pwr_cntl; + + if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD))) + return; + + hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL); + hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL); + + /* Before doing clock/power mode switch, forced on MEM clock */ + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + ATOMIC_MEM_CLK_SOFT_OVERRIDE, 1); + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + RC_MEM_CLK_SOFT_OVERRIDE, 1); + WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl); + + /* disable clock and power gating before any changing */ + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_CTRL_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_LS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_DS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_SD_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_CTRL_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_LS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_SD_EN, 0); + WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); + + /* Already disabled above. The actions below are for "enabled" only */ + if (enable) { + /* only one clock gating mode (LS/DS/SD) can be enabled */ + if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_SD_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_SD_EN, 1); + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_LS_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_LS_EN, 1); + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_DS_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, 1); + } + + /* confirmed that ATOMIC/RC_MEM_POWER_CTRL_EN have to be set for SRAM LS/DS/SD */ + if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD)) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_CTRL_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_CTRL_EN, 1); + WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); + } + } + + /* disable MEM clock override after clock/power mode changing */ + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + ATOMIC_MEM_CLK_SOFT_OVERRIDE, 0); + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + RC_MEM_CLK_SOFT_OVERRIDE, 0); + WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl); +} + +static void hdp_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t hdp_clk_cntl; + + if (!(adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG)) + return; + + hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL); + + if (enable) { + hdp_clk_cntl &= + ~(uint32_t) + (HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK); + } else { + hdp_clk_cntl |= HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK; + } + + WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl); +} + +static void hdp_v5_2_get_clockgating_state(struct amdgpu_device *adev, + u64 *flags) +{ + uint32_t tmp; + + /* AMD_CG_SUPPORT_HDP_MGCG */ + tmp = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL); + if (!(tmp & (HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK))) + *flags |= AMD_CG_SUPPORT_HDP_MGCG; + + /* AMD_CG_SUPPORT_HDP_LS/DS/SD */ + tmp = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL); + if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_LS; + else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_DS_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_DS; + else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_SD_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_SD; +} + +static void hdp_v5_2_update_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + hdp_v5_2_update_mem_power_gating(adev, enable); + hdp_v5_2_update_medium_grain_clock_gating(adev, enable); +} + const struct amdgpu_hdp_funcs hdp_v5_2_funcs = { .flush_hdp = hdp_v5_2_flush_hdp, + .update_clock_gating = hdp_v5_2_update_clock_gating, + .get_clock_gating_state = hdp_v5_2_get_clockgating_state, }; From f0462e398108223c574a71cbdfe2f8a0634faa50 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Tue, 9 Aug 2022 16:26:05 +0800 Subject: [PATCH 13/56] drm/amdgpu: add ATHUB IP v3.0.1 Clock Gating support Add ATHUB IP v3.0.1 in athub_v3_0_set_clockgating. The regATHUB_MISC_CNTL has different offset for ATHUB IP v3.0.1, so need to add IP version checking to use the right REG offset. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/athub_v3_0.c | 42 ++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c index 33a8a7365aef9..f0e235f98afb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c @@ -28,13 +28,44 @@ #include "navi10_enum.h" #include "soc15_common.h" +#define regATHUB_MISC_CNTL_V3_0_1 0x00d7 +#define regATHUB_MISC_CNTL_V3_0_1_BASE_IDX 0 + + +static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev) +{ + uint32_t data; + + switch (adev->ip_versions[ATHUB_HWIP][0]) { + case IP_VERSION(3, 0, 1): + data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1); + break; + default: + data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); + break; + } + return data; +} + +static void athub_v3_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data) +{ + switch (adev->ip_versions[ATHUB_HWIP][0]) { + case IP_VERSION(3, 0, 1): + WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1, data); + break; + default: + WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data); + break; + } +} + static void athub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, bool enable) { uint32_t def, data; - def = data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); + def = data = athub_v3_0_get_cg_cntl(adev); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_MGCG)) data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; @@ -42,7 +73,7 @@ athub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; if (def != data) - WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data); + athub_v3_0_set_cg_cntl(adev, data); } static void @@ -51,7 +82,7 @@ athub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, { uint32_t def, data; - def = data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); + def = data = athub_v3_0_get_cg_cntl(adev); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_LS)) data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; @@ -59,7 +90,7 @@ athub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; if (def != data) - WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data); + athub_v3_0_set_cg_cntl(adev, data); } int athub_v3_0_set_clockgating(struct amdgpu_device *adev, @@ -70,6 +101,7 @@ int athub_v3_0_set_clockgating(struct amdgpu_device *adev, switch (adev->ip_versions[ATHUB_HWIP][0]) { case IP_VERSION(3, 0, 0): + case IP_VERSION(3, 0, 1): case IP_VERSION(3, 0, 2): athub_v3_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); @@ -88,7 +120,7 @@ void athub_v3_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) int data; /* AMD_CG_SUPPORT_ATHUB_MGCG */ - data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); + data = athub_v3_0_get_cg_cntl(adev); if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) *flags |= AMD_CG_SUPPORT_ATHUB_MGCG; From d5c1e07cea09030e2489a32579c53a2315abf4c8 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Fri, 29 Jul 2022 15:13:03 +0800 Subject: [PATCH 14/56] drm/amdgpu: enable MMHUB IP v3.0.1 Clock Gating Enable AMD_CG_SUPPORT_MC_MGCG and AMD_CG_SUPPORT_MC_LS support. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc21.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 71e184ca60045..543cf40adf8e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -596,6 +596,8 @@ static int soc21_common_early_init(void *handle) AMD_CG_SUPPORT_GFX_FGCG | AMD_CG_SUPPORT_REPEATER_FGCG | AMD_CG_SUPPORT_GFX_PERF_CLK | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG; adev->pg_flags = From b0f733c1d4aecace3dbcac209bf32ffa866ba79d Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Fri, 29 Jul 2022 15:18:10 +0800 Subject: [PATCH 15/56] drm/amdgpu: enable HDP IP v5.2.1 Clock Gating Enable AMD_CG_SUPPORT_HDP_MGCG and AMD_CG_SUPPORT_HDP_LS support. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc21.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 543cf40adf8e0..d9e5bae82e836 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -598,6 +598,8 @@ static int soc21_common_early_init(void *handle) AMD_CG_SUPPORT_GFX_PERF_CLK | AMD_CG_SUPPORT_MC_MGCG | AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG; adev->pg_flags = @@ -704,6 +706,10 @@ static int soc21_common_set_clockgating_state(void *handle, adev->hdp.funcs->update_clock_gating(adev, state == AMD_CG_STATE_GATE); break; + case IP_VERSION(7, 7, 0): + adev->hdp.funcs->update_clock_gating(adev, + state == AMD_CG_STATE_GATE); + break; default: break; } From 8fbab5d8a48a4824be91261b2a1478388044ffb2 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Fri, 29 Jul 2022 15:21:08 +0800 Subject: [PATCH 16/56] drm/amdgpu: enable ATHUB IP v3.0.1 Clock Gating Enable AMD_CG_SUPPORT_ATHUB_MGCG and AMD_CG_SUPPORT_ATHUB_LS support. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc21.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index d9e5bae82e836..6c3440e7ed3f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -600,6 +600,8 @@ static int soc21_common_early_init(void *handle) AMD_CG_SUPPORT_MC_LS | AMD_CG_SUPPORT_HDP_MGCG | AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG; adev->pg_flags = From d4215cfc166434cc5010fe333e5da2925257d168 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Wed, 10 Aug 2022 22:09:36 +0800 Subject: [PATCH 17/56] drm/amdkfd: reserve 2 queues for sdma 6.0.1 in bitmap There is only one engine in sdma 6.0.1, the total number of reserved queues should be 2, reflect this number in bitmap as well. Signed-off-by: Yifan Zhang Reviewed-by: Tim Huang --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 07b9fe4016a60..f8545650c5900 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -102,13 +102,18 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd) switch (sdma_version) { case IP_VERSION(6, 0, 0): - case IP_VERSION(6, 0, 1): case IP_VERSION(6, 0, 2): /* Reserve 1 for paging and 1 for gfx */ kfd->device_info.num_reserved_sdma_queues_per_engine = 2; /* BIT(0)=engine-0 queue-0; BIT(1)=engine-1 queue-0; BIT(2)=engine-0 queue-1; ... */ kfd->device_info.reserved_sdma_queues_bitmap = 0xFULL; break; + case IP_VERSION(6, 0, 1): + /* Reserve 1 for paging and 1 for gfx */ + kfd->device_info.num_reserved_sdma_queues_per_engine = 2; + /* BIT(0)=engine-0 queue-0; BIT(1)=engine-0 queue-1; ... */ + kfd->device_info.reserved_sdma_queues_bitmap = 0x3ULL; + break; default: break; } From 8fb6496947a2d3f17280cc32f8cad3fdb325bf03 Mon Sep 17 00:00:00 2001 From: Flora Cui Date: Tue, 16 Aug 2022 16:50:39 +0800 Subject: [PATCH 18/56] drm/amdkcl: fix ttm debugfs dir name Signed-off-by: Flora Cui Reviewed-by: Leslie Shi --- drivers/gpu/drm/ttm/ttm_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 22dcdef082cc5..1197ecfc4e8c8 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -80,7 +80,7 @@ static int ttm_global_init(void) si_meminfo(&si); - ttm_debugfs_root = debugfs_create_dir("ttm", NULL); + ttm_debugfs_root = debugfs_create_dir(TTM_NAME, NULL); if (IS_ERR(ttm_debugfs_root)) { ttm_debugfs_root = NULL; } From 41b75974a03d85f9f4a552107628c753193b15e2 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 10 Aug 2022 18:53:07 -0400 Subject: [PATCH 19/56] drm/amdkfd: Try to schedule bottom half on same core On systems that support SMT (hyperthreading) schedule the bottom half of the KFD interrupt handler on the same core. This makes it possible to reserve a core for interrupt handling and have the bottom half run on that same core. On systems without SMT, pick another core in the same NUMA node, as before. Use for_each_cpu_wrap instead of open-coding it. Signed-off-by: Felix Kuehling Reviewed-by: Philip Yang --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index f8545650c5900..bc99d2d490abf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "kfd_priv.h" #include "kfd_device_queue_manager.h" #include "kfd_pm4_headers_vi.h" @@ -819,13 +820,24 @@ static inline void kfd_queue_work(struct workqueue_struct *wq, struct work_struct *work) { int cpu, new_cpu; + const struct cpumask *mask = NULL; cpu = new_cpu = smp_processor_id(); - do { - new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids; - if (cpu_to_node(new_cpu) == numa_node_id()) + +#if defined(CONFIG_SCHED_SMT) + /* CPU threads in the same core */ + mask = cpu_smt_mask(cpu); +#endif + if (!mask || cpumask_weight(mask) <= 1) + /* CPU threads in the same NUMA node */ + mask = cpu_cpu_mask(cpu); + /* Pick the next online CPU thread in the same core or NUMA node */ + for_each_cpu_wrap(cpu, mask, cpu+1) { + if (cpu != new_cpu && cpu_online(cpu)) { + new_cpu = cpu; break; - } while (cpu != new_cpu); + } + } queue_work_on(new_cpu, wq, work); } From bb6ed25175c728b3b074f21f41c21fd5a74ffa0a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 12 Aug 2022 09:20:58 +0300 Subject: [PATCH 20/56] drm/amdkfd: potential crash in kfd_create_indirect_link_prop() This code has two bugs. If kfd_topology_device_by_proximity_domain() failed on the first iteration through the loop then "cpu_link" is uninitialized and should not be dereferenced. The second bug is that we cannot dereference a list iterator when it points to the list head. In other words, if we exit the list_for_each_entry() loop exits without hitting a break then "cpu_link" is not a valid pointer and should not be dereferenced. Fix both of these problems by setting "cpu_link" to NULL when it is invalid and non-NULL when it is valid. That makes it easier to test for valid vs invalid. Fixes: 0f28cca87e9a ("drm/amdkfd: Extend KFD device topology to surface peer-to-peer links") Signed-off-by: Dan Carpenter Signed-off-by: Felix Kuehling Reviewed-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 779d3f2374664..f396b28327808 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1494,8 +1494,8 @@ static int kfd_build_p2p_node_entry(struct kfd_topology_device *dev, static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int gpu_node) { + struct kfd_iolink_properties *gpu_link, *tmp_link, *cpu_link; struct kfd_iolink_properties *props = NULL, *props2 = NULL; - struct kfd_iolink_properties *gpu_link, *cpu_link; struct kfd_topology_device *cpu_dev; int ret = 0; int i, num_cpu; @@ -1518,16 +1518,19 @@ static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int g continue; /* find CPU <--> CPU links */ + cpu_link = NULL; cpu_dev = kfd_topology_device_by_proximity_domain(i); if (cpu_dev) { - list_for_each_entry(cpu_link, + list_for_each_entry(tmp_link, &cpu_dev->io_link_props, list) { - if (cpu_link->node_to == gpu_link->node_to) + if (tmp_link->node_to == gpu_link->node_to) { + cpu_link = tmp_link; break; + } } } - if (cpu_link->node_to != gpu_link->node_to) + if (!cpu_link) return -ENOMEM; /* CPU <--> CPU <--> GPU, GPU node*/ From a01888f883c4480091558e41b5b00d7726da206c Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Fri, 29 Jul 2022 15:22:44 +0800 Subject: [PATCH 21/56] drm/amdgpu: enable IH Clock Gating for OSS IP v6.0.1 Enable AMD_CG_SUPPORT_IH_CG support. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc21.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 6c3440e7ed3f0..1ff7fc7bb3400 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -602,6 +602,7 @@ static int soc21_common_early_init(void *handle) AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_IH_CG | AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG; adev->pg_flags = From e8ff107d7279c8e74b5fdb21ac9f334a4ab95b2d Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Fri, 12 Aug 2022 11:37:37 +0800 Subject: [PATCH 22/56] drm/amd/pm: Enable GFXOFF feature for SMU IP v13.0.4 The driver needs to set EnableGfxImu message parameter to tell the PMFW to set the flag that enables the GFXOFF feature. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index e56ec06012ddc..3651f6f750680 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -2345,8 +2345,8 @@ int smu_v13_0_set_gfx_power_up_by_imu(struct smu_context *smu) index = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_MSG, SMU_MSG_EnableGfxImu); - - return smu_cmn_send_msg_without_waiting(smu, index, 0); + /* Param 1 to tell PMFW to enable GFXOFF feature */ + return smu_cmn_send_msg_without_waiting(smu, index, 1); } int smu_v13_0_od_edit_dpm_table(struct smu_context *smu, From b73dde25ba90412da11f2119cad33457dd19b711 Mon Sep 17 00:00:00 2001 From: Asher Song Date: Mon, 15 Aug 2022 22:07:12 +0800 Subject: [PATCH 23/56] drm/amdkcl: fake macro for_each_cpu_wrap and function cpumask_next_wrap It's caused by 78231f639e2eec3f14de8bb8309f459413ca86b4 drm/amdkfd: Try to schedule bottom half on same core Signed-off-by: Asher Song Reviewed-by: Leslie Shi --- drivers/gpu/drm/amd/amdkcl/Makefile | 2 +- drivers/gpu/drm/amd/amdkcl/kcl_cpumask.c | 38 +++++++++++++++++++++ drivers/gpu/drm/amd/backport/backport.h | 1 + include/kcl/kcl_cpumask.h | 43 ++++++++++++++++++++++++ 4 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/amdkcl/kcl_cpumask.c create mode 100644 include/kcl/kcl_cpumask.h diff --git a/drivers/gpu/drm/amd/amdkcl/Makefile b/drivers/gpu/drm/amd/amdkcl/Makefile index 114f3861ac5ff..36e1f19709ea1 100644 --- a/drivers/gpu/drm/amd/amdkcl/Makefile +++ b/drivers/gpu/drm/amd/amdkcl/Makefile @@ -13,7 +13,7 @@ amdkcl-y += kcl_backlight.o kcl_ioctl.o \ kcl_device_cgroup.o kcl_mn.o kcl_drm_modes.o kcl_time.o kcl_ftrace.o \ kcl_acpi_table.o kcl_page_alloc.o kcl_numa.o kcl_fs_read_write.o kcl_drm_aperture.o \ kcl_drm_drv.o kcl_drm_simple_kms_helper.o kcl_bitmap.o kcl_vmscan.o kcl_dma_fence_chain.o \ - kcl_mce_amd.o kcl_workqueue.o + kcl_mce_amd.o kcl_workqueue.o kcl_cpumask.o amdkcl-$(CONFIG_DRM_AMD_DC_HDCP) += kcl_drm_hdcp.o amdkcl-$(CONFIG_MMU_NOTIFIER) += kcl_mn.o diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_cpumask.c b/drivers/gpu/drm/amd/amdkcl/kcl_cpumask.c new file mode 100644 index 0000000000000..fe36b386ff52b --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_cpumask.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#ifndef for_each_cpu_wrap +/* copied from lib/cpumask.c +/** + * cpumask_next_wrap - helper to implement for_each_cpu_wrap + * @n: the cpu prior to the place to search + * @mask: the cpumask pointer + * @start: the start point of the iteration + * @wrap: assume @n crossing @start terminates the iteration + * + * Returns >= nr_cpu_ids on completion + * + * Note: the @wrap argument is required for the start condition when + * we cannot assume @start is set in @mask. + */ +int _kcl_cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap) +{ + int next; + +again: + next = cpumask_next(n, mask); + + if (wrap && n < start && next >= start) { + return nr_cpumask_bits; + + } else if (next >= nr_cpumask_bits) { + wrap = true; + n = -1; + goto again; + } + + return next; +} +EXPORT_SYMBOL(_kcl_cpumask_next_wrap); +#endif + diff --git a/drivers/gpu/drm/amd/backport/backport.h b/drivers/gpu/drm/amd/backport/backport.h index 1992b972a1bdd..b89f0fe0664ee 100644 --- a/drivers/gpu/drm/amd/backport/backport.h +++ b/drivers/gpu/drm/amd/backport/backport.h @@ -105,5 +105,6 @@ #include #include #include +#include #endif /* AMDGPU_BACKPORT_H */ diff --git a/include/kcl/kcl_cpumask.h b/include/kcl/kcl_cpumask.h new file mode 100644 index 0000000000000..aee779d6ec5f2 --- /dev/null +++ b/include/kcl/kcl_cpumask.h @@ -0,0 +1,43 @@ +/*SPDX-License-Identifier: GPL-2.0*/ + +#include +#include +#include +#include +#include + +#ifndef for_each_cpu_wrap + +extern int _kcl_cpumask_next_wrap(int n, const struct cpumask *mask, + int start, bool wrap); + +static inline +int cpumask_next_wrap(int n, const struct cpumask *mask, + int start, bool wrap) +{ +return _kcl_cpumask_next_wrap(n, mask, start, wrap); +} + +/* Copied from include/linux/cpumask.h */ +#if NR_CPUS == 1 +#define for_each_cpu_wrap(cpu, mask, start) \ + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)(start)) +#else +/** + * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location + * @cpu: the (optionally unsigned) integer iterator + * @mask: the cpumask pointer + * @start: the start location + * + * The implementation does not assume any bit in @mask is set (including @start). + * + * After the loop, cpu is >= nr_cpu_ids. + */ +#define for_each_cpu_wrap(cpu, mask, start) \ + for ((cpu) = cpumask_next_wrap((start)-1, (mask), (start), false); \ + (cpu) < nr_cpumask_bits; \ + (cpu) = cpumask_next_wrap((cpu), (mask), (start), true)) + +#endif +#endif + From 43c9838ac0fc30cfcc6d17c7b6a1b2500ecfdbd6 Mon Sep 17 00:00:00 2001 From: Ethan Wellenreiter Date: Fri, 29 Jul 2022 13:45:51 -0400 Subject: [PATCH 24/56] drm/amd/display: reverted limiting vscsdp_for_colorimetry and ARGB16161616 pixel format addition [WHY] Limiting vscsdp_for_colorimetry for YCbCr420/BT2020 resulted in red/green point failures in HDR10 DTN tests. The re-implementation of ARGB16161616 was to fix this however it did not actually fix this issue but a side effect of the issue. [HOW] Change ARGB16161616 pixel format to 26. Reviewed-by: Martin Leung Acked-by: Brian Chang Signed-off-by: Ethan Wellenreiter Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c | 2 -- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c | 3 --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c | 2 -- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c | 3 --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c | 2 -- 5 files changed, 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c index d4a6504dfe000..db7ca4b0cdb9d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c @@ -361,8 +361,6 @@ void dpp1_cnv_setup ( select = INPUT_CSC_SELECT_ICSC; break; case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: - pixel_format = 22; - break; case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: pixel_format = 26; /* ARGB16161616_UNORM */ break; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index b54c124003237..564e061ccb589 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -278,9 +278,6 @@ void hubp1_program_pixel_format( SURFACE_PIXEL_FORMAT, 10); break; case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: - REG_UPDATE(DCSURF_SURFACE_CONFIG, - SURFACE_PIXEL_FORMAT, 22); - break; case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: /*we use crossbar already*/ REG_UPDATE(DCSURF_SURFACE_CONFIG, SURFACE_PIXEL_FORMAT, 26); /* ARGB16161616_UNORM */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c index ea1f14af0db75..eaa7032f0f1a3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c @@ -166,8 +166,6 @@ static void dpp2_cnv_setup ( select = DCN2_ICSC_SELECT_ICSC_A; break; case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: - pixel_format = 22; - break; case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: pixel_format = 26; /* ARGB16161616_UNORM */ break; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c index 936af65381ef7..9570c2118ccc7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c @@ -463,9 +463,6 @@ void hubp2_program_pixel_format( SURFACE_PIXEL_FORMAT, 10); break; case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: - REG_UPDATE(DCSURF_SURFACE_CONFIG, - SURFACE_PIXEL_FORMAT, 22); - break; case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: /*we use crossbar already*/ REG_UPDATE(DCSURF_SURFACE_CONFIG, SURFACE_PIXEL_FORMAT, 26); /* ARGB16161616_UNORM */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c index 77b00f86c2165..4a668d6563dfd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c @@ -244,8 +244,6 @@ void dpp3_cnv_setup ( select = INPUT_CSC_SELECT_ICSC; break; case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: - pixel_format = 22; - break; case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: pixel_format = 26; /* ARGB16161616_UNORM */ break; From ffc503b2e77093ffc95a6717f7bd6fe38702c6b5 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Mon, 1 Aug 2022 20:17:54 -0400 Subject: [PATCH 25/56] drm/amd/display: 3.2.198 This version brings along following fixes: -Fix edp panel missing event -Set ARGB16161616 pixel format to 26 -Fix dcn32 interger issue -Clear optc underflow bit after ODM clock off -Fix issue with stereo3D -Fix DML2 lightup issue -Correct DTBCLK for dcn314 -Revert for a regression -Fix clocks and bugs in DML2 -Enable SubVP by defalut on DCN32 & DCN321 -Corret boundary condition for engin ID on DCN303 -Fix FRL encoder override registry key -Fix VPG for dcn314 HPO -Fix Linux compile-time warning -Add new prefetch modes in DML for DCN32 Acked-by: Brian Chang Signed-off-by: Aric Cyr Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 6621f608b5a98..a85f485902835 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -47,7 +47,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.197" +#define DC_VER "3.2.198" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 85d3ea60b821443438b532017f767f76adcc1f8e Mon Sep 17 00:00:00 2001 From: Ilya Bakoulin Date: Tue, 26 Jul 2022 16:19:38 -0400 Subject: [PATCH 26/56] drm/amd/display: Fix pixel clock programming [Why] Some pixel clock values could cause HDMI TMDS SSCPs to be misaligned between different HDMI lanes when using YCbCr420 10-bit pixel format. BIOS functions for transmitter/encoder control take pixel clock in kHz increments, whereas the function for setting the pixel clock is in 100Hz increments. Setting pixel clock to a value that is not on a kHz boundary will cause the issue. [How] Round pixel clock down to nearest kHz in 10/12-bpc cases. Reviewed-by: Aric Cyr Acked-by: Brian Chang Signed-off-by: Ilya Bakoulin Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index f8cf7e8d4bef8..49f2f46e0f652 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -543,9 +543,11 @@ static void dce112_get_pix_clk_dividers_helper ( switch (pix_clk_params->color_depth) { case COLOR_DEPTH_101010: actual_pixel_clock_100hz = (actual_pixel_clock_100hz * 5) >> 2; + actual_pixel_clock_100hz -= actual_pixel_clock_100hz % 10; break; case COLOR_DEPTH_121212: actual_pixel_clock_100hz = (actual_pixel_clock_100hz * 6) >> 2; + actual_pixel_clock_100hz -= actual_pixel_clock_100hz % 10; break; case COLOR_DEPTH_161616: actual_pixel_clock_100hz = actual_pixel_clock_100hz * 2; From 525bb44de21e6663e598be1f303bcbc48384b31d Mon Sep 17 00:00:00 2001 From: Ian Chen Date: Tue, 2 Aug 2022 15:13:37 +0800 Subject: [PATCH 27/56] Add reserved dc_log_type. Reviewed-by: Anthony Koo Acked-by: Brian Chang Signed-off-by: Ian Chen Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/include/logger_types.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/include/logger_types.h b/drivers/gpu/drm/amd/display/include/logger_types.h index 5d19a43189000..d89cf3e76c6dc 100644 --- a/drivers/gpu/drm/amd/display/include/logger_types.h +++ b/drivers/gpu/drm/amd/display/include/logger_types.h @@ -121,13 +121,15 @@ enum dc_log_type { LOG_DSC, #endif LOG_SMU_MSG, + LOG_DC2RESERVED4, + LOG_DC2RESERVED5, LOG_DWB, LOG_GAMMA_DEBUG, LOG_MAX_HW_POINTS, LOG_ALL_TF_CHANNELS, LOG_SAMPLE_1DLUT, LOG_DP2, - LOG_SECTION_TOTAL_COUNT + LOG_DC2RESERVED12, }; #define DC_MIN_LOG_MASK ((1 << LOG_ERROR) | \ From 7f1ff9282269c53ff7de1316efd646a095c231cd Mon Sep 17 00:00:00 2001 From: Josip Pavic Date: Fri, 29 Jul 2022 11:08:09 -0400 Subject: [PATCH 28/56] drm/amd/display: do not compare integers of different widths [Why & How] Increase width of some variables to avoid comparing integers of different widths. Reviewed-by: Alvin Lee Acked-by: Brian Chang Signed-off-by: Josip Pavic Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index bb815654dbd22..6f1aef3d2bf85 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -332,7 +332,8 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable) { union dmub_rb_cmd cmd; - uint8_t ways, i, j; + uint8_t ways, i; + int j; bool stereo_in_use = false; struct dc_plane_state *plane = NULL; From bcd197fd99274c72acf0f6a64a064591536428c6 Mon Sep 17 00:00:00 2001 From: Dusica Milinkovic Date: Wed, 10 Aug 2022 09:43:15 +0200 Subject: [PATCH 29/56] drm/amdgpu: Increase tlb flush timeout for sriov [Why] During multi-vf executing benchmark (Luxmark) observed kiq error timeout. It happenes because all of VFs do the tlb invalidation at the same time. Although each VF has the invalidate register set, from hardware side the invalidate requests are queue to execute. [How] In case of 12 VF increase timeout on 12*100ms Signed-off-by: Dusica Milinkovic Acked-by: Shaoyun Liu Acked-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 3 ++- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 1372e2b475418..89d93833f49b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -321,7 +321,7 @@ enum amdgpu_kiq_irq { AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0, AMDGPU_CP_KIQ_IRQ_LAST }; - +#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */ #define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ #define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ #define MAX_KIQ_REG_TRY 1000 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 482380fe76db3..750bb16dda36d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -418,6 +418,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint32_t seq; uint16_t queried_pasid; bool ret; + u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout; struct amdgpu_ring *ring = &adev->gfx.kiq.ring; struct amdgpu_kiq *kiq = &adev->gfx.kiq; @@ -436,7 +437,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq.ring_lock); - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); if (r < 1) { dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); return -ETIME; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 1fca5d7dbd246..df70314b0d84b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -894,6 +894,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint32_t seq; uint16_t queried_pasid; bool ret; + u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout; struct amdgpu_ring *ring = &adev->gfx.kiq.ring; struct amdgpu_kiq *kiq = &adev->gfx.kiq; @@ -932,7 +933,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq.ring_lock); - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); if (r < 1) { dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); return -ETIME; From b280b3501e55fbb251bc8ca726b28235814ed273 Mon Sep 17 00:00:00 2001 From: Daniel Miess Date: Tue, 2 Aug 2022 16:53:47 -0400 Subject: [PATCH 30/56] drm/amd/display: Add debug parameter to retain default clock table [Why] Need a way to retain default clock table to aid the investigation into why 8k@30 display not lighting up on dcn314 [How] Use flag to prevent execution of bw_params helper function and function for updating bw_bounding_box Reviewed-by: Nicholas Kazlauskas Reviewed-by: Jun Lei Acked-by: Brian Chang Signed-off-by: Daniel Miess Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c | 2 +- drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c index 7af19823a29db..c74f2d5bbbc56 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c @@ -719,7 +719,7 @@ void dcn314_clk_mgr_construct( if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) { dcn314_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks); - if (ctx->dc_bios && ctx->dc_bios->integrated_info) { + if (ctx->dc_bios && ctx->dc_bios->integrated_info && ctx->dc->config.use_default_clock_table == false) { dcn314_clk_mgr_helper_populate_bw_params( &clk_mgr->base, ctx->dc_bios->integrated_info, diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index a85f485902835..14361bc91ccb1 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -404,6 +404,7 @@ struct dc_config { bool use_pipe_ctx_sync_logic; bool ignore_dpref_ss; bool enable_mipi_converter_optimization; + bool use_default_clock_table; }; enum visual_confirm { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c index c80307a6af1bf..34a5d0f87b5f9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c @@ -189,7 +189,7 @@ void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p dc_assert_fp_enabled(); // Default clock levels are used for diags, which may lead to overclocking. - if (!IS_DIAG_DC(dc->ctx->dce_environment)) { + if (!IS_DIAG_DC(dc->ctx->dce_environment) && dc->config.use_default_clock_table == false) { dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count; From 7e554766cc3bfb70dc8a0f442c81cfefcbef1901 Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Wed, 3 Aug 2022 15:15:17 +0800 Subject: [PATCH 31/56] drm/amd/display: Fix plug/unplug external monitor will hang while playback MPO video [Why] Pipes for MPO primary and overlay will be power down and power up during plug/unplug external monitor while MPO video playback. But the pipes were the same after plug/unplug and should not need to be power down and power up or it will make page flip interrupt disabled and cause hang issue. [How] Add pipe split change condition that not only check the top pipe pointer but also check the index of top pipe if both top pipes are available. Reviewed-by: Sun peng Li Acked-by: Brian Chang Signed-off-by: Tom Chung Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/core/dc.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index cf99097887076..719ba38a10b8a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1081,8 +1081,15 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) struct dc_stream_state *old_stream = dc->current_state->res_ctx.pipe_ctx[i].stream; bool should_disable = true; - bool pipe_split_change = - context->res_ctx.pipe_ctx[i].top_pipe != dc->current_state->res_ctx.pipe_ctx[i].top_pipe; + bool pipe_split_change = false; + + if ((context->res_ctx.pipe_ctx[i].top_pipe) && + (dc->current_state->res_ctx.pipe_ctx[i].top_pipe)) + pipe_split_change = context->res_ctx.pipe_ctx[i].top_pipe->pipe_idx != + dc->current_state->res_ctx.pipe_ctx[i].top_pipe->pipe_idx; + else + pipe_split_change = context->res_ctx.pipe_ctx[i].top_pipe != + dc->current_state->res_ctx.pipe_ctx[i].top_pipe; for (j = 0; j < context->stream_count; j++) { if (old_stream == context->streams[j]) { From af39b7703ec15503df6222d6245b642ac78b2370 Mon Sep 17 00:00:00 2001 From: Chaitanya Dhere Date: Wed, 27 Jul 2022 15:48:30 -0400 Subject: [PATCH 32/56] drm/amd/display: Modify header inclusion pattern [Why] Recent backport from opensource broke the Nightly tool build that tests DC and DML for bugs and regressions. This was because the backport had a header inclusion that was not consistent with the AMD style of including headers was allowed to be merged back in DML code that caused tool compilation failures. [How] Modify the way in which the header file in included so that it is consistent with AMD style of including headers. This then automatically fixes the tool compilation process and also helps maintain the code quality and consistency. Reviewed-by: Alvin Lee Reviewed-by: Jun Lei Acked-by: Brian Chang Signed-off-by: Chaitanya Dhere Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c | 2 +- .../gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c index 3fab19134480d..d63b4209b14c0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c @@ -26,7 +26,7 @@ #include "dc.h" #include "dc_link.h" #include "../display_mode_lib.h" -#include "dml/dcn30/display_mode_vba_30.h" +#include "../dcn30/display_mode_vba_30.h" #include "display_mode_vba_31.h" #include "../dml_inline_defs.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c index 66b82e4f05c6e..35d10b4d018bf 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c @@ -27,7 +27,7 @@ #include "../display_mode_vba.h" #include "../dml_inline_defs.h" #include "display_rq_dlg_calc_31.h" -#include "dml/dcn30/display_mode_vba_30.h" +#include "../dcn30/display_mode_vba_30.h" static bool is_dual_plane(enum source_format_class source_format) { From 370365f75e1068727cc0e88f3a783ada8706cef1 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 5 Aug 2022 12:02:10 -0400 Subject: [PATCH 33/56] drm/amd/display: Update clock table policy for DCN314 [Why & How] Depending on how the clock table is constructed from PMFW we can run into issues where we don't think we have enough bandwidth available due to FCLK too low - eg. when the FCLK table contains invalid entries or a single entry. We should always pick up the maximum clocks for each state as a final state in this case to prevent validation from failing if the table is malformed. We should also contain sensible defaults in the case where values are invalid. Redfine the clock table structures by adding a 314 prefix to make debugging these issues easier by avoiding symbol name clashes. Overall this policy more closely aligns to how we did things for 315, but because of how the voltage rail is setup we should favor keeping DCFCLK low rather than DISPCLK or DPPCLK - so use the max for those in every entry. Reviewed-by: Daniel Miess Acked-by: Brian Chang Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler --- .../dc/clk_mgr/dcn314/dcn314_clk_mgr.c | 186 ++++++++++++------ .../display/dc/clk_mgr/dcn314/dcn314_smu.h | 33 +++- 2 files changed, 154 insertions(+), 65 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c index c74f2d5bbbc56..beb025cd3dc29 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c @@ -415,7 +415,7 @@ static struct wm_table lpddr5_wm_table = { } }; -static DpmClocks_t dummy_clocks; +static DpmClocks314_t dummy_clocks; static struct dcn314_watermarks dummy_wms = { 0 }; @@ -500,7 +500,7 @@ static void dcn314_notify_wm_ranges(struct clk_mgr *clk_mgr_base) static void dcn314_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr, struct dcn314_smu_dpm_clks *smu_dpm_clks) { - DpmClocks_t *table = smu_dpm_clks->dpm_clks; + DpmClocks314_t *table = smu_dpm_clks->dpm_clks; if (!clk_mgr->smu_ver) return; @@ -517,6 +517,26 @@ static void dcn314_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr, dcn314_smu_transfer_dpm_table_smu_2_dram(clk_mgr); } +static inline bool is_valid_clock_value(uint32_t clock_value) +{ + return clock_value > 1 && clock_value < 100000; +} + +static unsigned int convert_wck_ratio(uint8_t wck_ratio) +{ + switch (wck_ratio) { + case WCK_RATIO_1_2: + return 2; + + case WCK_RATIO_1_4: + return 4; + + default: + break; + } + return 1; +} + static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks) { uint32_t max = 0; @@ -530,89 +550,127 @@ static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks) return max; } -static unsigned int find_clk_for_voltage( - const DpmClocks_t *clock_table, - const uint32_t clocks[], - unsigned int voltage) -{ - int i; - int max_voltage = 0; - int clock = 0; - - for (i = 0; i < NUM_SOC_VOLTAGE_LEVELS; i++) { - if (clock_table->SocVoltage[i] == voltage) { - return clocks[i]; - } else if (clock_table->SocVoltage[i] >= max_voltage && - clock_table->SocVoltage[i] < voltage) { - max_voltage = clock_table->SocVoltage[i]; - clock = clocks[i]; - } - } - - ASSERT(clock); - return clock; -} - static void dcn314_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk_mgr, struct integrated_info *bios_info, - const DpmClocks_t *clock_table) + const DpmClocks314_t *clock_table) { - int i, j; struct clk_bw_params *bw_params = clk_mgr->base.bw_params; - uint32_t max_dispclk = 0, max_dppclk = 0; - - j = -1; - - ASSERT(NUM_DF_PSTATE_LEVELS <= MAX_NUM_DPM_LVL); - - /* Find lowest DPM, FCLK is filled in reverse order*/ + struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1]; + uint32_t max_pstate = 0, max_fclk = 0, min_pstate = 0, max_dispclk = 0, max_dppclk = 0; + int i; - for (i = NUM_DF_PSTATE_LEVELS - 1; i >= 0; i--) { - if (clock_table->DfPstateTable[i].FClk != 0) { - j = i; - break; + /* Find highest valid fclk pstate */ + for (i = 0; i < clock_table->NumDfPstatesEnabled; i++) { + if (is_valid_clock_value(clock_table->DfPstateTable[i].FClk) && + clock_table->DfPstateTable[i].FClk > max_fclk) { + max_fclk = clock_table->DfPstateTable[i].FClk; + max_pstate = i; } } - if (j == -1) { - /* clock table is all 0s, just use our own hardcode */ - ASSERT(0); - return; - } - - bw_params->clk_table.num_entries = j + 1; + /* We expect the table to contain at least one valid fclk entry. */ + ASSERT(is_valid_clock_value(max_fclk)); - /* dispclk and dppclk can be max at any voltage, same number of levels for both */ + /* Dispclk and dppclk can be max at any voltage, same number of levels for both */ if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS && clock_table->NumDispClkLevelsEnabled <= NUM_DPPCLK_DPM_LEVELS) { max_dispclk = find_max_clk_value(clock_table->DispClocks, clock_table->NumDispClkLevelsEnabled); max_dppclk = find_max_clk_value(clock_table->DppClocks, clock_table->NumDispClkLevelsEnabled); } else { + /* Invalid number of entries in the table from PMFW. */ ASSERT(0); } - for (i = 0; i < bw_params->clk_table.num_entries; i++, j--) { - bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[j].FClk; - bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[j].MemClk; - bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[j].Voltage; - switch (clock_table->DfPstateTable[j].WckRatio) { - case WCK_RATIO_1_2: - bw_params->clk_table.entries[i].wck_ratio = 2; - break; - case WCK_RATIO_1_4: - bw_params->clk_table.entries[i].wck_ratio = 4; - break; - default: - bw_params->clk_table.entries[i].wck_ratio = 1; + /* Base the clock table on dcfclk, need at least one entry regardless of pmfw table */ + for (i = 0; i < clock_table->NumDcfClkLevelsEnabled; i++) { + uint32_t min_fclk = clock_table->DfPstateTable[0].FClk; + int j; + + for (j = 1; j < clock_table->NumDfPstatesEnabled; j++) { + if (is_valid_clock_value(clock_table->DfPstateTable[j].FClk) && + clock_table->DfPstateTable[j].FClk < min_fclk && + clock_table->DfPstateTable[j].Voltage <= clock_table->SocVoltage[i]) { + min_fclk = clock_table->DfPstateTable[j].FClk; + min_pstate = j; + } } - bw_params->clk_table.entries[i].dcfclk_mhz = find_clk_for_voltage(clock_table, clock_table->DcfClocks, clock_table->DfPstateTable[j].Voltage); - bw_params->clk_table.entries[i].socclk_mhz = find_clk_for_voltage(clock_table, clock_table->SocClocks, clock_table->DfPstateTable[j].Voltage); + + /* First search defaults for the clocks we don't read using closest lower or equal default dcfclk */ + for (j = bw_params->clk_table.num_entries - 1; j > 0; j--) + if (bw_params->clk_table.entries[j].dcfclk_mhz <= clock_table->DcfClocks[i]) + break; + + bw_params->clk_table.entries[i].phyclk_mhz = bw_params->clk_table.entries[j].phyclk_mhz; + bw_params->clk_table.entries[i].phyclk_d18_mhz = bw_params->clk_table.entries[j].phyclk_d18_mhz; + bw_params->clk_table.entries[i].dtbclk_mhz = bw_params->clk_table.entries[j].dtbclk_mhz; + + /* Now update clocks we do read */ + bw_params->clk_table.entries[i].fclk_mhz = min_fclk; + bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[min_pstate].MemClk; + bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[min_pstate].Voltage; + bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[i]; + bw_params->clk_table.entries[i].socclk_mhz = clock_table->SocClocks[i]; + bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk; + bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk; + bw_params->clk_table.entries[i].wck_ratio = convert_wck_ratio( + clock_table->DfPstateTable[min_pstate].WckRatio); + }; + + /* Make sure to include at least one entry at highest pstate */ + if (max_pstate != min_pstate || i == 0) { + if (i > MAX_NUM_DPM_LVL - 1) + i = MAX_NUM_DPM_LVL - 1; + + bw_params->clk_table.entries[i].fclk_mhz = max_fclk; + bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[max_pstate].MemClk; + bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[max_pstate].Voltage; + bw_params->clk_table.entries[i].dcfclk_mhz = find_max_clk_value(clock_table->DcfClocks, NUM_DCFCLK_DPM_LEVELS); + bw_params->clk_table.entries[i].socclk_mhz = find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS); bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk; bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk; + bw_params->clk_table.entries[i].wck_ratio = convert_wck_ratio( + clock_table->DfPstateTable[max_pstate].WckRatio); + i++; } + bw_params->clk_table.num_entries = i--; + + /* Make sure all highest clocks are included*/ + bw_params->clk_table.entries[i].socclk_mhz = find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS); + bw_params->clk_table.entries[i].dispclk_mhz = find_max_clk_value(clock_table->DispClocks, NUM_DISPCLK_DPM_LEVELS); + bw_params->clk_table.entries[i].dppclk_mhz = find_max_clk_value(clock_table->DppClocks, NUM_DPPCLK_DPM_LEVELS); + ASSERT(clock_table->DcfClocks[i] == find_max_clk_value(clock_table->DcfClocks, NUM_DCFCLK_DPM_LEVELS)); + bw_params->clk_table.entries[i].phyclk_mhz = def_max.phyclk_mhz; + bw_params->clk_table.entries[i].phyclk_d18_mhz = def_max.phyclk_d18_mhz; + bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz; + /* + * Set any 0 clocks to max default setting. Not an issue for + * power since we aren't doing switching in such case anyway + */ + for (i = 0; i < bw_params->clk_table.num_entries; i++) { + if (!bw_params->clk_table.entries[i].fclk_mhz) { + bw_params->clk_table.entries[i].fclk_mhz = def_max.fclk_mhz; + bw_params->clk_table.entries[i].memclk_mhz = def_max.memclk_mhz; + bw_params->clk_table.entries[i].voltage = def_max.voltage; + } + if (!bw_params->clk_table.entries[i].dcfclk_mhz) + bw_params->clk_table.entries[i].dcfclk_mhz = def_max.dcfclk_mhz; + if (!bw_params->clk_table.entries[i].socclk_mhz) + bw_params->clk_table.entries[i].socclk_mhz = def_max.socclk_mhz; + if (!bw_params->clk_table.entries[i].dispclk_mhz) + bw_params->clk_table.entries[i].dispclk_mhz = def_max.dispclk_mhz; + if (!bw_params->clk_table.entries[i].dppclk_mhz) + bw_params->clk_table.entries[i].dppclk_mhz = def_max.dppclk_mhz; + if (!bw_params->clk_table.entries[i].phyclk_mhz) + bw_params->clk_table.entries[i].phyclk_mhz = def_max.phyclk_mhz; + if (!bw_params->clk_table.entries[i].phyclk_d18_mhz) + bw_params->clk_table.entries[i].phyclk_d18_mhz = def_max.phyclk_d18_mhz; + if (!bw_params->clk_table.entries[i].dtbclk_mhz) + bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz; + } + ASSERT(bw_params->clk_table.entries[i-1].dcfclk_mhz); bw_params->vram_type = bios_info->memory_type; - bw_params->num_channels = bios_info->ma_channel_number; + bw_params->num_channels = bios_info->ma_channel_number ? bios_info->ma_channel_number : 4; for (i = 0; i < WM_SET_COUNT; i++) { bw_params->wm_table.entries[i].wm_inst = i; @@ -671,10 +729,10 @@ void dcn314_clk_mgr_construct( } ASSERT(clk_mgr->smu_wm_set.wm_set); - smu_dpm_clks.dpm_clks = (DpmClocks_t *)dm_helpers_allocate_gpu_mem( + smu_dpm_clks.dpm_clks = (DpmClocks314_t *)dm_helpers_allocate_gpu_mem( clk_mgr->base.base.ctx, DC_MEM_ALLOC_TYPE_FRAME_BUFFER, - sizeof(DpmClocks_t), + sizeof(DpmClocks314_t), &smu_dpm_clks.mc_address.quad_part); if (smu_dpm_clks.dpm_clks == NULL) { diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h index a7958dc965810..047d19ea919c7 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h @@ -36,6 +36,37 @@ typedef enum { WCK_RATIO_MAX } WCK_RATIO_e; +typedef struct { + uint32_t FClk; + uint32_t MemClk; + uint32_t Voltage; + uint8_t WckRatio; + uint8_t Spare[3]; +} DfPstateTable314_t; + +//Freq in MHz +//Voltage in milli volts with 2 fractional bits +typedef struct { + uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS]; + uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS]; + uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS]; + uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS]; + uint32_t VClocks[NUM_VCN_DPM_LEVELS]; + uint32_t DClocks[NUM_VCN_DPM_LEVELS]; + uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS]; + DfPstateTable314_t DfPstateTable[NUM_DF_PSTATE_LEVELS]; + + uint8_t NumDcfClkLevelsEnabled; + uint8_t NumDispClkLevelsEnabled; //Applies to both Dispclk and Dppclk + uint8_t NumSocClkLevelsEnabled; + uint8_t VcnClkLevelsEnabled; //Applies to both Vclk and Dclk + uint8_t NumDfPstatesEnabled; + uint8_t spare[3]; + + uint32_t MinGfxClk; + uint32_t MaxGfxClk; +} DpmClocks314_t; + struct dcn314_watermarks { // Watermarks WatermarkRowGeneric_t WatermarkRow[WM_COUNT][NUM_WM_RANGES]; @@ -43,7 +74,7 @@ struct dcn314_watermarks { }; struct dcn314_smu_dpm_clks { - DpmClocks_t *dpm_clks; + DpmClocks314_t *dpm_clks; union large_integer mc_address; }; From 6101e215115fbcdd708282abf41ecc98be0d650d Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Fri, 5 Aug 2022 16:18:31 -0400 Subject: [PATCH 34/56] drm/amd/display: Don't set DSC for phantom pipes [Description] Don't set DSC bit for phantom pipes, not required since phantom pipe don't have any actual output Reviewed-by: Jun Lei Acked-by: Brian Chang Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index a51e74344698e..0813f4fdb68c4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -493,6 +493,7 @@ void dcn32_set_phantom_stream_timing(struct dc *dc, phantom_stream->timing.v_front_porch + phantom_stream->timing.v_sync_width + phantom_bp; + phantom_stream->timing.flags.DSC = 0; // Don't need DSC for phantom timing } /** From b705377b7c62368fe2a985f39f69c921b4be2e73 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Sat, 6 Aug 2022 12:00:26 -0400 Subject: [PATCH 35/56] drm/amd/display: Use pitch when calculating size to cache in MALL [Description] Use pitch when calculating size to cache in MALL Reviewed-by: Samson Tam Acked-by: Brian Chang Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index fba95082b5c36..e59aad43ea0b7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -63,7 +63,7 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat if (pipe->stream && pipe->plane_state && !pipe->top_pipe && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4; - mall_region_pixels = pipe->stream->timing.h_addressable * pipe->stream->timing.v_addressable; + mall_region_pixels = pipe->plane_state->plane_size.surface_pitch * pipe->stream->timing.v_addressable; // For bytes required in MALL, calculate based on number of MBlks required num_mblks = (mall_region_pixels * bytes_per_pixel + From f81d0bf9aac4b3cf685fe26ec791a46a1b70b83c Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Fri, 5 Aug 2022 12:59:47 -0400 Subject: [PATCH 36/56] drm/amd/display: avoid doing vm_init multiple time [why] this is to ensure that driver will not reprogram hvm_prefetch_req again if it is done. Reviewed-by: Martin Leung Acked-by: Brian Chang Signed-off-by: Charlene Liu Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c index c5e200d09038f..5752271f22dfe 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c @@ -67,9 +67,15 @@ static uint32_t convert_and_clamp( void dcn21_dchvm_init(struct hubbub *hubbub) { struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub); - uint32_t riommu_active; + uint32_t riommu_active, prefetch_done; int i; + REG_GET(DCHVM_RIOMMU_STAT0, HOSTVM_PREFETCH_DONE, &prefetch_done); + + if (prefetch_done) { + hubbub->riommu_active = true; + return; + } //Init DCHVM block REG_UPDATE(DCHVM_CTRL0, HOSTVM_INIT_REQ, 1); From 3d4ca22839c0b1e388c217af543dd16c10275f3c Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Mon, 25 Jul 2022 14:04:26 -0400 Subject: [PATCH 37/56] drm/amd/display: add chip revision to DCN32 [Why & How] Add GC_11_0_3_A0 as a chip revision to the DCN32 family Reviewed-by: Rodrigo Siqueira Acked-by: Brian Chang Signed-off-by: Samson Tam Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/include/dal_asic_id.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index e054f3494087f..9f3558c0ef110 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -247,10 +247,12 @@ enum { #define AMDGPU_FAMILY_GC_11_0_1 148 #define GC_11_0_0_A0 0x1 #define GC_11_0_2_A0 0x10 +#define GC_11_0_3_A0 0x20 #define GC_11_UNKNOWN 0xFF #define ASICREV_IS_GC_11_0_0(eChipRev) (eChipRev < GC_11_0_2_A0) -#define ASICREV_IS_GC_11_0_2(eChipRev) (eChipRev >= GC_11_0_2_A0 && eChipRev < GC_11_UNKNOWN) +#define ASICREV_IS_GC_11_0_2(eChipRev) (eChipRev >= GC_11_0_2_A0 && eChipRev < GC_11_0_3_A0) +#define ASICREV_IS_GC_11_0_3(eChipRev) (eChipRev >= GC_11_0_3_A0 && eChipRev < GC_11_UNKNOWN) /* * ASIC chip ID From 64dfb569f9350f7f8b5ab630dce9f1c55dd13561 Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Fri, 5 Aug 2022 18:04:08 -0400 Subject: [PATCH 38/56] drm/amdgpu: Add decode_iv_ts helper for ih_v6 block Signed-off-by: Harish Kasiviswanathan Reviewed-by: Hawking Zhang --- drivers/gpu/drm/amd/amdgpu/ih_v6_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index 92dc60a9d2094..085e613f3646d 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -727,6 +727,7 @@ static const struct amd_ip_funcs ih_v6_0_ip_funcs = { static const struct amdgpu_ih_funcs ih_v6_0_funcs = { .get_wptr = ih_v6_0_get_wptr, .decode_iv = amdgpu_ih_decode_iv_helper, + .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper, .set_rptr = ih_v6_0_set_rptr }; From 294efaecfba9cd046652353a78a6f48c2505cbe8 Mon Sep 17 00:00:00 2001 From: Andrey Strachuk Date: Tue, 19 Jul 2022 13:39:54 +0300 Subject: [PATCH 39/56] drm/amdgpu: remove useless condition in amdgpu_job_stop_all_jobs_on_sched() Local variable 'rq' is initialized by an address of field of drm_sched_job, so it does not make sense to compare 'rq' with NULL. Found by Linux Verification Center (linuxtesting.org) with SVACE. Reviewed-by: Andrey Grodzovsky Signed-off-by: Andrey Strachuk Fixes: 7c6e68c777f1 ("drm/amdgpu: Avoid HW GPU reset for RAS.") Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 723721bdd6bf9..1d65e638f376f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -279,10 +279,6 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched) /* Signal all jobs not yet scheduled */ for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { struct drm_sched_rq *rq = &sched->sched_rq[i]; - - if (!rq) - continue; - spin_lock(&rq->lock); list_for_each_entry(s_entity, &rq->entities, list) { while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) { From 7e6583a80840999c1e822ea688b15324153fcf7b Mon Sep 17 00:00:00 2001 From: min tang Date: Sat, 13 Aug 2022 23:10:46 +0800 Subject: [PATCH 40/56] drm/amd/display: Fix comment typo The double `aligned' is duplicated in line 1070, remove one. Signed-off-by: min tang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 0877ab143b98b..1466c156847ac 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -1086,7 +1086,7 @@ static void optc1_set_test_pattern( src_color[index] >> (src_bpc - dst_bpc); /* CRTC_TEST_PATTERN_DATA has 16 bits, * lowest 6 are hardwired to ZERO - * color bits should be left aligned aligned to MSB + * color bits should be left aligned to MSB * XXXXXXXXXX000000 for 10 bit, * XXXXXXXX00000000 for 8 bit and XXXXXX0000000000 for 6 */ From 1f224226b60ca3a8e319d93da2f5817b99bbfebd Mon Sep 17 00:00:00 2001 From: min tang Date: Sat, 13 Aug 2022 23:34:21 +0800 Subject: [PATCH 41/56] drm/amd/display: Unneeded semicolon There is no semicolon after '}' in line 510. Signed-off-by: min tang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c index cc076621f5e66..bff0f57e7fe61 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c @@ -507,7 +507,7 @@ static void dcn315_clk_mgr_helper_populate_bw_params( bw_params->clk_table.entries[i].dispclk_mhz = clock_table->DispClocks[i]; bw_params->clk_table.entries[i].dppclk_mhz = clock_table->DppClocks[i]; bw_params->clk_table.entries[i].wck_ratio = 1; - }; + } /* Make sure to include at least one entry and highest pstate */ if (max_pstate != min_pstate || i == 0) { From ba2e0ec9378ec7515c169a27897a823cb883e572 Mon Sep 17 00:00:00 2001 From: David Gow Date: Thu, 11 Aug 2022 17:43:26 -0300 Subject: [PATCH 42/56] drm/amd/display: fix overflow on MIN_I64 definition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The definition of MIN_I64 in bw_fixed.c can cause gcc to whinge about integer overflow, because it is treated as a positive value, which is then negated. The temporary positive value is not necessarily representable. This causes the following warning: ../drivers/gpu/drm/amd/amdgpu/../display/dc/dml/calcs/bw_fixed.c:30:19: warning: integer overflow in expression ‘-9223372036854775808’ of type ‘long long int’ results in ‘-9223372036854775808’ [-Woverflow] 30 | (int64_t)(-(1LL << 63)) | ^ Writing out (-MAX_I64 - 1) works instead. Signed-off-by: David Gow Signed-off-by: Tales Aparecida Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/calcs/bw_fixed.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/bw_fixed.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/bw_fixed.c index 6ca288fb5fb9e..2d46bc527b218 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/calcs/bw_fixed.c +++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/bw_fixed.c @@ -26,12 +26,12 @@ #include "bw_fixed.h" -#define MIN_I64 \ - (int64_t)(-(1LL << 63)) - #define MAX_I64 \ (int64_t)((1ULL << 63) - 1) +#define MIN_I64 \ + (-MAX_I64 - 1) + #define FRACTIONAL_PART_MASK \ ((1ULL << BW_FIXED_BITS_PER_FRACTIONAL_PART) - 1) From 087e17d7cba36aed63bf8b112eb0e34ef1643bf0 Mon Sep 17 00:00:00 2001 From: Tales Aparecida Date: Thu, 11 Aug 2022 17:43:27 -0300 Subject: [PATCH 43/56] drm/amd/display: fix minor codestyle problems Fixes five checkpatch warnings: CHECK: Please don't use multiple blank lines + + ERROR: Macros with complex values should be enclosed in parentheses +#define MAX_I64 \ + (int64_t)((1ULL << 63) - 1) WARNING: Missing a blank line after declarations + struct bw_fixed res; + ASSERT(value < BW_FIXED_MAX_I32 && value > BW_FIXED_MIN_I32); ERROR: that open brace { should be on the previous line + do + { ERROR: that open brace { should be on the previous line + if (remainder >= arg2_value) + { Signed-off-by: Tales Aparecida Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/calcs/bw_fixed.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/bw_fixed.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/bw_fixed.c index 2d46bc527b218..3aa8dd0acd5e8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/calcs/bw_fixed.c +++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/bw_fixed.c @@ -25,9 +25,8 @@ #include "dm_services.h" #include "bw_fixed.h" - #define MAX_I64 \ - (int64_t)((1ULL << 63) - 1) + ((int64_t)((1ULL << 63) - 1)) #define MIN_I64 \ (-MAX_I64 - 1) @@ -49,6 +48,7 @@ static uint64_t abs_i64(int64_t arg) struct bw_fixed bw_int_to_fixed_nonconst(int64_t value) { struct bw_fixed res; + ASSERT(value < BW_FIXED_MAX_I32 && value > BW_FIXED_MIN_I32); res.value = value << BW_FIXED_BITS_PER_FRACTIONAL_PART; return res; @@ -78,14 +78,12 @@ struct bw_fixed bw_frc_to_fixed(int64_t numerator, int64_t denominator) { uint32_t i = BW_FIXED_BITS_PER_FRACTIONAL_PART; - do - { + do { remainder <<= 1; res_value <<= 1; - if (remainder >= arg2_value) - { + if (remainder >= arg2_value) { res_value |= 1; remainder -= arg2_value; } From ecf1e711fa64947e480a89689e90c17ccdefb8ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Wed, 10 Aug 2022 20:28:55 -0300 Subject: [PATCH 44/56] drm/amd: Add detailed GFXOFF stats to debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add debugfs interface to log GFXOFF statistics: - Read amdgpu_gfxoff_count to get the total GFXOFF entry count at the time of query since system power-up - Write 1 to amdgpu_gfxoff_residency to start logging, and 0 to stop. Read it to get average GFXOFF residency % multiplied by 100 during the last logging interval. Both features are designed to be keep the values persistent between suspends. Signed-off-by: André Almeida Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 168 ++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 39 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 6 + drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 45 +++++ drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 3 + drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 33 ++++ drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 22 +++ drivers/gpu/drm/amd/pm/swsmu/smu_internal.h | 3 + 9 files changed, 321 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 27670770b384e..dd78fb44cc135 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1061,6 +1061,157 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, return r; } +/** + * amdgpu_debugfs_gfxoff_residency_read - Read GFXOFF residency + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + * + * Read the last residency value logged. It doesn't auto update, one needs to + * stop logging before getting the current value. + */ +static ssize_t amdgpu_debugfs_gfxoff_residency_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + while (size) { + uint32_t value; + + r = amdgpu_get_gfx_off_residency(adev, &value); + if (r) + goto out; + + r = put_user(value, (uint32_t *)buf); + if (r) + goto out; + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + r = result; +out: + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + + return r; +} + +/** + * amdgpu_debugfs_gfxoff_residency_write - Log GFXOFF Residency + * + * @f: open file handle + * @buf: User buffer to write data from + * @size: Number of bytes to write + * @pos: Offset to seek to + * + * Write a 32-bit non-zero to start logging; write a 32-bit zero to stop + */ +static ssize_t amdgpu_debugfs_gfxoff_residency_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + while (size) { + u32 value; + + r = get_user(value, (uint32_t *)buf); + if (r) + goto out; + + amdgpu_set_gfx_off_residency(adev, value ? true : false); + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + r = result; +out: + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + + return r; +} + + +/** + * amdgpu_debugfs_gfxoff_count_read - Read GFXOFF entry count + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + */ +static ssize_t amdgpu_debugfs_gfxoff_count_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + while (size) { + u64 value = 0; + + r = amdgpu_get_gfx_off_entrycount(adev, &value); + if (r) + goto out; + + r = put_user(value, (u64 *)buf); + if (r) + goto out; + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + r = result; +out: + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + + return r; +} + /** * amdgpu_debugfs_gfxoff_write - Enable/disable GFXOFF * @@ -1268,6 +1419,19 @@ static const struct file_operations amdgpu_debugfs_gfxoff_status_fops = { .llseek = default_llseek }; +static const struct file_operations amdgpu_debugfs_gfxoff_count_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_gfxoff_count_read, + .llseek = default_llseek +}; + +static const struct file_operations amdgpu_debugfs_gfxoff_residency_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_gfxoff_residency_read, + .write = amdgpu_debugfs_gfxoff_residency_write, + .llseek = default_llseek +}; + static const struct file_operations *debugfs_regs[] = { &amdgpu_debugfs_regs_fops, &amdgpu_debugfs_regs2_fops, @@ -1280,6 +1444,8 @@ static const struct file_operations *debugfs_regs[] = { &amdgpu_debugfs_gpr_fops, &amdgpu_debugfs_gfxoff_fops, &amdgpu_debugfs_gfxoff_status_fops, + &amdgpu_debugfs_gfxoff_count_fops, + &amdgpu_debugfs_gfxoff_residency_fops, }; static const char *debugfs_regs_names[] = { @@ -1294,6 +1460,8 @@ static const char *debugfs_regs_names[] = { "amdgpu_gpr", "amdgpu_gfxoff", "amdgpu_gfxoff_status", + "amdgpu_gfxoff_count", + "amdgpu_gfxoff_residency", }; /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ebb722811dcf7..2c6b9cc810458 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3594,6 +3594,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); adev->gfx.gfx_off_req_count = 1; + adev->gfx.gfx_off_residency = 0; + adev->gfx.gfx_off_entrycount = 0; adev->pm.ac_power = power_supply_is_system_supplied() > 0; atomic_set(&adev->throttling_logging_enabled, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 454a78ba60d43..ceb91469958aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -610,6 +610,45 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) mutex_unlock(&adev->gfx.gfx_off_mutex); } +int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value) +{ + int r = 0; + + mutex_lock(&adev->gfx.gfx_off_mutex); + + r = amdgpu_dpm_set_residency_gfxoff(adev, value); + + mutex_unlock(&adev->gfx.gfx_off_mutex); + + return r; +} + +int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value) +{ + int r = 0; + + mutex_lock(&adev->gfx.gfx_off_mutex); + + r = amdgpu_dpm_get_residency_gfxoff(adev, value); + + mutex_unlock(&adev->gfx.gfx_off_mutex); + + return r; +} + +int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value) +{ + int r = 0; + + mutex_lock(&adev->gfx.gfx_off_mutex); + + r = amdgpu_dpm_get_entrycount_gfxoff(adev, value); + + mutex_unlock(&adev->gfx.gfx_off_mutex); + + return r; +} + int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 1581067698cab..1f4481c13c58a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -347,6 +347,8 @@ struct amdgpu_gfx { struct mutex gfx_off_mutex; uint32_t gfx_off_req_count; /* default 1, enable gfx off: dec 1, disable gfx off: add 1 */ struct delayed_work gfx_off_delay_work; + uint32_t gfx_off_residency; + uint64_t gfx_off_entrycount; /* pipe reservation */ struct mutex pipe_reserve_mutex; @@ -418,6 +420,10 @@ bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me, void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable); int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value); int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); +void amdgpu_gfx_ras_fini(struct amdgpu_device *adev); +int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value); +int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *residency); +int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value); int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, void *err_data, struct amdgpu_iv_entry *entry); diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 956b6ce81c846..1b300c569faf5 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -668,6 +668,51 @@ int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev, return ret; } +int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev, bool value) +{ + struct smu_context *smu = adev->powerplay.pp_handle; + int ret = 0; + + if (!is_support_sw_smu(adev)) + return -EOPNOTSUPP; + + mutex_lock(&adev->pm.mutex); + ret = smu_set_residency_gfxoff(smu, value); + mutex_unlock(&adev->pm.mutex); + + return ret; +} + +int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev, u32 *value) +{ + struct smu_context *smu = adev->powerplay.pp_handle; + int ret = 0; + + if (!is_support_sw_smu(adev)) + return -EOPNOTSUPP; + + mutex_lock(&adev->pm.mutex); + ret = smu_get_residency_gfxoff(smu, value); + mutex_unlock(&adev->pm.mutex); + + return ret; +} + +int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device *adev, u64 *value) +{ + struct smu_context *smu = adev->powerplay.pp_handle; + int ret = 0; + + if (!is_support_sw_smu(adev)) + return -EOPNOTSUPP; + + mutex_lock(&adev->pm.mutex); + ret = smu_get_entrycount_gfxoff(smu, value); + mutex_unlock(&adev->pm.mutex); + + return ret; +} + int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev, uint32_t *value) { struct smu_context *smu = adev->powerplay.pp_handle; diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index 65624d091ed2d..cb5b9df78b4db 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -435,6 +435,9 @@ int amdgpu_dpm_set_soft_freq_range(struct amdgpu_device *adev, int amdgpu_dpm_write_watermarks_table(struct amdgpu_device *adev); int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev, enum smu_event_type event, uint64_t event_arg); +int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev, u32 *value); +int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev, bool value); +int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device *adev, u64 *value); int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev, uint32_t *value); uint64_t amdgpu_dpm_get_thermal_throttling_counter(struct amdgpu_device *adev); void amdgpu_dpm_gfx_state_change(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 7510d470b8643..55b7910b43852 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -90,6 +90,30 @@ static int smu_sys_set_pp_feature_mask(void *handle, return smu_set_pp_feature_mask(smu, new_mask); } +int smu_set_residency_gfxoff(struct smu_context *smu, bool value) +{ + if (!smu->ppt_funcs->set_gfx_off_residency) + return -EINVAL; + + return smu_set_gfx_off_residency(smu, value); +} + +int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value) +{ + if (!smu->ppt_funcs->get_gfx_off_residency) + return -EINVAL; + + return smu_get_gfx_off_residency(smu, value); +} + +int smu_get_entrycount_gfxoff(struct smu_context *smu, u64 *value) +{ + if (!smu->ppt_funcs->get_gfx_off_entrycount) + return -EINVAL; + + return smu_get_gfx_off_entrycount(smu, value); +} + int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value) { if (!smu->ppt_funcs->get_gfx_off_status) @@ -1576,6 +1600,7 @@ static int smu_suspend(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct smu_context *smu = adev->powerplay.pp_handle; int ret; + uint64_t count; if (amdgpu_sriov_vf(adev)&& !amdgpu_sriov_is_pp_one_vf(adev)) return 0; @@ -1593,6 +1618,14 @@ static int smu_suspend(void *handle) smu_set_gfx_cgpg(smu, false); + /* + * pwfw resets entrycount when device is suspended, so we save the + * last value to be used when we resume to keep it consistent + */ + ret = smu_get_entrycount_gfxoff(smu, &count); + if (!ret) + adev->gfx.gfx_off_entrycount = count; + return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index b81c657c73860..e2fa3b066b968 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -1111,6 +1111,22 @@ struct pptable_funcs { */ uint32_t (*get_gfx_off_status)(struct smu_context *smu); + /** + * @gfx_off_entrycount: total GFXOFF entry count at the time of + * query since system power-up + */ + u32 (*get_gfx_off_entrycount)(struct smu_context *smu, uint64_t *entrycount); + + /** + * @set_gfx_off_residency: set 1 to start logging, 0 to stop logging + */ + u32 (*set_gfx_off_residency)(struct smu_context *smu, bool start); + + /** + * @get_gfx_off_residency: Average GFXOFF residency % during the logging interval + */ + u32 (*get_gfx_off_residency)(struct smu_context *smu, uint32_t *residency); + /** * @register_irq_handler: Register interupt request handlers. */ @@ -1454,6 +1470,12 @@ int smu_set_ac_dc(struct smu_context *smu); int smu_allow_xgmi_power_down(struct smu_context *smu, bool en); +int smu_get_entrycount_gfxoff(struct smu_context *smu, u64 *value); + +int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value); + +int smu_set_residency_gfxoff(struct smu_context *smu, bool value); + int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value); int smu_handle_passthrough_sbr(struct smu_context *smu, bool enable); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h index 7469bbfce1fb0..ceb13c8380679 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h @@ -47,6 +47,9 @@ #define smu_notify_memory_pool_location(smu) smu_ppt_funcs(notify_memory_pool_location, 0, smu) #define smu_gfx_off_control(smu, enable) smu_ppt_funcs(gfx_off_control, 0, smu, enable) #define smu_get_gfx_off_status(smu) smu_ppt_funcs(get_gfx_off_status, 0, smu) +#define smu_get_gfx_off_entrycount(smu, value) smu_ppt_funcs(get_gfx_off_entrycount, 0, smu, value) +#define smu_get_gfx_off_residency(smu, value) smu_ppt_funcs(get_gfx_off_residency, 0, smu, value) +#define smu_set_gfx_off_residency(smu, value) smu_ppt_funcs(set_gfx_off_residency, 0, smu, value) #define smu_set_last_dcef_min_deep_sleep_clk(smu) smu_ppt_funcs(set_last_dcef_min_deep_sleep_clk, 0, smu) #define smu_system_features_control(smu, en) smu_ppt_funcs(system_features_control, 0, smu, en) #define smu_init_max_sustainable_clocks(smu) smu_ppt_funcs(init_max_sustainable_clocks, 0, smu) From 7f3d5f1ceed45061706d5adbbaf0ad706b43f167 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Wed, 10 Aug 2022 20:28:56 -0300 Subject: [PATCH 45/56] drm/amd/pm: Implement GFXOFF's entry count and residency for vangogh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement functions to get and set GFXOFF's entry count and residency for vangogh. Signed-off-by: André Almeida Signed-off-by: Alex Deucher --- .../pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h | 5 +- drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 5 +- .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 76 +++++++++++++++++++ 3 files changed, 84 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h index fe130a497d6c3..7471e2df28285 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h @@ -108,7 +108,10 @@ #define PPSMC_MSG_SetSlowPPTLimit 0x4A #define PPSMC_MSG_GetFastPPTLimit 0x4B #define PPSMC_MSG_GetSlowPPTLimit 0x4C -#define PPSMC_Message_Count 0x4D +#define PPSMC_MSG_GetGfxOffStatus 0x50 +#define PPSMC_MSG_GetGfxOffEntryCount 0x51 +#define PPSMC_MSG_LogGfxOffResidency 0x52 +#define PPSMC_Message_Count 0x53 //Argument for PPSMC_MSG_GfxDeviceDriverReset enum { diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index 28f6a1eb6945c..58098b82df660 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -236,7 +236,10 @@ __SMU_DUMMY_MAP(HeavySBR), \ __SMU_DUMMY_MAP(SetBadHBMPagesRetiredFlagsPerChannel), \ __SMU_DUMMY_MAP(EnableGfxImu), \ - __SMU_DUMMY_MAP(DriverMode2Reset), + __SMU_DUMMY_MAP(DriverMode2Reset), \ + __SMU_DUMMY_MAP(GetGfxOffStatus), \ + __SMU_DUMMY_MAP(GetGfxOffEntryCount), \ + __SMU_DUMMY_MAP(LogGfxOffResidency), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 89504ff8e9ed7..847990145dcd9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -138,6 +138,9 @@ static struct cmn2asic_msg_mapping vangogh_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(SetSlowPPTLimit, PPSMC_MSG_SetSlowPPTLimit, 0), MSG_MAP(GetFastPPTLimit, PPSMC_MSG_GetFastPPTLimit, 0), MSG_MAP(GetSlowPPTLimit, PPSMC_MSG_GetSlowPPTLimit, 0), + MSG_MAP(GetGfxOffStatus, PPSMC_MSG_GetGfxOffStatus, 0), + MSG_MAP(GetGfxOffEntryCount, PPSMC_MSG_GetGfxOffEntryCount, 0), + MSG_MAP(LogGfxOffResidency, PPSMC_MSG_LogGfxOffResidency, 0), }; static struct cmn2asic_mapping vangogh_feature_mask_map[SMU_FEATURE_COUNT] = { @@ -2200,6 +2203,76 @@ static int vangogh_set_power_limit(struct smu_context *smu, return ret; } +/** + * vangogh_set_gfxoff_residency + * + * @smu: amdgpu_device pointer + * @start: start/stop residency log + * + * This function will be used to log gfxoff residency + * + * + * Returns standard response codes. + */ +static u32 vangogh_set_gfxoff_residency(struct smu_context *smu, bool start) +{ + int ret = 0; + u32 residency; + struct amdgpu_device *adev = smu->adev; + + if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) + return 0; + + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_LogGfxOffResidency, + start, &residency); + + if (!start) + adev->gfx.gfx_off_residency = residency; + + return ret; +} + +/** + * vangogh_get_gfxoff_residency + * + * @smu: amdgpu_device pointer + * + * This function will be used to get gfxoff residency. + * + * Returns standard response codes. + */ +static u32 vangogh_get_gfxoff_residency(struct smu_context *smu, uint32_t *residency) +{ + struct amdgpu_device *adev = smu->adev; + + *residency = adev->gfx.gfx_off_residency; + + return 0; +} + +/** + * vangogh_get_gfxoff_entrycount - get gfxoff entry count + * + * @smu: amdgpu_device pointer + * + * This function will be used to get gfxoff entry count + * + * Returns standard response codes. + */ +static u32 vangogh_get_gfxoff_entrycount(struct smu_context *smu, uint64_t *entrycount) +{ + int ret = 0, value = 0; + struct amdgpu_device *adev = smu->adev; + + if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) + return 0; + + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetGfxOffEntryCount, &value); + *entrycount = value + adev->gfx.gfx_off_entrycount; + + return ret; +} + static const struct pptable_funcs vangogh_ppt_funcs = { .check_fw_status = smu_v11_0_check_fw_status, @@ -2237,6 +2310,9 @@ static const struct pptable_funcs vangogh_ppt_funcs = { .mode2_reset = vangogh_mode2_reset, .gfx_off_control = smu_v11_0_gfx_off_control, .get_gfx_off_status = vangogh_get_gfxoff_status, + .get_gfx_off_entrycount = vangogh_get_gfxoff_entrycount, + .get_gfx_off_residency = vangogh_get_gfxoff_residency, + .set_gfx_off_residency = vangogh_set_gfxoff_residency, .get_ppt_limit = vangogh_get_ppt_limit, .get_power_limit = vangogh_get_power_limit, .set_power_limit = vangogh_set_power_limit, From e0336321713b6ca3a85430dd35cab7e611b933c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Wed, 10 Aug 2022 20:28:57 -0300 Subject: [PATCH 46/56] Documentation/gpu: Document GFXOFF's count and residency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add documentation explaining those two new files. While here, add a note about the value type. Signed-off-by: André Almeida Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu/thermal.rst | 57 ++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/Documentation/gpu/amdgpu/thermal.rst b/Documentation/gpu/amdgpu/thermal.rst index 8aeb0186c9ef8..5e27e4eb39596 100644 --- a/Documentation/gpu/amdgpu/thermal.rst +++ b/Documentation/gpu/amdgpu/thermal.rst @@ -63,3 +63,60 @@ gpu_metrics .. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c :doc: gpu_metrics + +GFXOFF +====== + +GFXOFF is a feature found in most recent GPUs that saves power at runtime. The +card's RLC (RunList Controller) firmware powers off the gfx engine +dynamically when there is no workload on gfx or compute pipes. GFXOFF is on by +default on supported GPUs. + +Userspace can interact with GFXOFF through a debugfs interface (all values in +`uint32_t`, unless otherwise noted): + +``amdgpu_gfxoff`` +----------------- + +Use it to enable/disable GFXOFF, and to check if it's current enabled/disabled:: + + $ xxd -l1 -p /sys/kernel/debug/dri/0/amdgpu_gfxoff + 01 + +- Write 0 to disable it, and 1 to enable it. +- Read 0 means it's disabled, 1 it's enabled. + +If it's enabled, that means that the GPU is free to enter into GFXOFF mode as +needed. Disabled means that it will never enter GFXOFF mode. + +``amdgpu_gfxoff_status`` +------------------------ + +Read it to check current GFXOFF's status of a GPU:: + + $ xxd -l1 -p /sys/kernel/debug/dri/0/amdgpu_gfxoff_status + 02 + +- 0: GPU is in GFXOFF state, the gfx engine is powered down. +- 1: Transition out of GFXOFF state +- 2: Not in GFXOFF state +- 3: Transition into GFXOFF state + +If GFXOFF is enabled, the value will be transitioning around [0, 3], always +getting into 0 when possible. When it's disabled, it's always at 2. Returns +``-EINVAL`` if it's not supported. + +``amdgpu_gfxoff_count`` +----------------------- + +Read it to get the total GFXOFF entry count at the time of query since system +power-up. The value is an `uint64_t` type, however, due to firmware limitations, +it can currently overflow as an `uint32_t`. *Only supported in vangogh* + +``amdgpu_gfxoff_residency`` +--------------------------- + +Write 1 to amdgpu_gfxoff_residency to start logging, and 0 to stop. Read it to +get average GFXOFF residency % multiplied by 100 during the last logging +interval. E.g. a value of 7854 means 78.54% of the time in the last logging +interval the GPU was in GFXOFF mode. *Only supported in vangogh* From a86707ae6c581f3f1826a9675a892144176aa312 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Wed, 10 Aug 2022 20:28:58 -0300 Subject: [PATCH 47/56] drm/amdgpu: Document gfx_off members of struct amdgpu_gfx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add comments to document gfx_off related members of struct amdgpu_gfx. Signed-off-by: André Almeida Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 1f4481c13c58a..027e993ff45ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -343,12 +343,12 @@ struct amdgpu_gfx { uint32_t srbm_soft_reset; /* gfx off */ - bool gfx_off_state; /* true: enabled, false: disabled */ - struct mutex gfx_off_mutex; - uint32_t gfx_off_req_count; /* default 1, enable gfx off: dec 1, disable gfx off: add 1 */ - struct delayed_work gfx_off_delay_work; - uint32_t gfx_off_residency; - uint64_t gfx_off_entrycount; + bool gfx_off_state; /* true: enabled, false: disabled */ + struct mutex gfx_off_mutex; /* mutex to change gfxoff state */ + uint32_t gfx_off_req_count; /* default 1, enable gfx off: dec 1, disable gfx off: add 1 */ + struct delayed_work gfx_off_delay_work; /* async work to set gfx block off */ + uint32_t gfx_off_residency; /* last logged residency */ + uint64_t gfx_off_entrycount; /* count of times GPU has get into GFXOFF state */ /* pipe reservation */ struct mutex pipe_reserve_mutex; From 1aad18ce5e79f981e4bc56584be07e430e5006f0 Mon Sep 17 00:00:00 2001 From: Khalid Masum Date: Tue, 16 Aug 2022 00:34:25 +0600 Subject: [PATCH 48/56] drm/amdgpu/vcn: Return void from the stop_dbg_mode There is no point in returning an int here. It only returns 0 which the caller never uses. Therefore return void and remove the unnecessary assignment. Addresses-Coverity: 1504988 ("Unused value") Fixes: 8da1170a16e4 ("drm/amdgpu: add VCN4 ip block support") Reviewed-by: Ruijing Dong Suggested-by: Ruijing Dong Suggested-by: Greg Kroah-Hartman Signed-off-by: Khalid Masum Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index ca14c3ef742ec..fb2d74f304481 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -1115,7 +1115,7 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) * * Stop VCN block with dpg mode */ -static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +static void vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) { uint32_t tmp; @@ -1133,7 +1133,6 @@ static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) /* disable dynamic power gating mode */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); - return 0; } /** @@ -1154,7 +1153,7 @@ static int vcn_v4_0_stop(struct amdgpu_device *adev) fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - r = vcn_v4_0_stop_dpg_mode(adev, i); + vcn_v4_0_stop_dpg_mode(adev, i); continue; } From fb0f439cbecc762fed1df43bfa48d3b4b26c69ec Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Thu, 4 Aug 2022 14:42:47 -0400 Subject: [PATCH 49/56] drm/amd/display: Include scaling factor for SubVP command [Description] For SubVP scaling cases, we must include the scaling info as part of the cmd. This is required when converting OTG line to HUBP line for the MALL_START_LINE programming. Reviewed-by: Jun Lei Acked-by: Brian Chang Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler --- .../drm/amd/display/dc/basics/conversion.c | 21 +++++++++++++++++++ .../drm/amd/display/dc/basics/conversion.h | 3 +++ drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 11 ++++++++++ .../amd/display/dc/dcn321/dcn321_resource.c | 2 +- 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/basics/conversion.c b/drivers/gpu/drm/amd/display/dc/basics/conversion.c index 6767fab55c260..352e9afb85c6d 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/conversion.c +++ b/drivers/gpu/drm/amd/display/dc/basics/conversion.c @@ -100,3 +100,24 @@ void convert_float_matrix( matrix[i] = (uint16_t)reg_value; } } + +static uint32_t find_gcd(uint32_t a, uint32_t b) +{ + uint32_t remainder = 0; + while (b != 0) { + remainder = a % b; + a = b; + b = remainder; + } + return a; +} + +void reduce_fraction(uint32_t num, uint32_t den, + uint32_t *out_num, uint32_t *out_den) +{ + uint32_t gcd = 0; + + gcd = find_gcd(num, den); + *out_num = num / gcd; + *out_den = den / gcd; +} diff --git a/drivers/gpu/drm/amd/display/dc/basics/conversion.h b/drivers/gpu/drm/amd/display/dc/basics/conversion.h index ade785c4fdc7d..81da4e6f7a1ac 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/conversion.h +++ b/drivers/gpu/drm/amd/display/dc/basics/conversion.h @@ -38,6 +38,9 @@ void convert_float_matrix( struct fixed31_32 *flt, uint32_t buffer_size); +void reduce_fraction(uint32_t num, uint32_t den, + uint32_t *out_num, uint32_t *out_den); + static inline unsigned int log_2(unsigned int num) { return ilog2(num); diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index c8059c28ac494..09b304507badb 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -29,6 +29,7 @@ #include "dm_helpers.h" #include "dc_hw_types.h" #include "core_types.h" +#include "../basics/conversion.h" #define CTX dc_dmub_srv->ctx #define DC_LOGGER CTX->logger @@ -600,6 +601,7 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc, &cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[cmd_pipe_index]; struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing; struct dc_crtc_timing *phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing; + uint32_t out_num, out_den; pipe_data->mode = SUBVP; pipe_data->pipe_config.subvp_data.pix_clk_100hz = subvp_pipe->stream->timing.pix_clk_100hz; @@ -613,6 +615,15 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc, pipe_data->pipe_config.subvp_data.main_pipe_index = subvp_pipe->pipe_idx; pipe_data->pipe_config.subvp_data.is_drr = subvp_pipe->stream->ignore_msa_timing_param; + /* Calculate the scaling factor from the src and dst height. + * e.g. If 3840x2160 being downscaled to 1920x1080, the scaling factor is 1/2. + * Reduce the fraction 1080/2160 = 1/2 for the "scaling factor" + */ + reduce_fraction(subvp_pipe->stream->src.height, subvp_pipe->stream->dst.height, &out_num, &out_den); + // TODO: Uncomment below lines once DMCUB include headers are promoted + //pipe_data->pipe_config.subvp_data.scale_factor_numerator = out_num; + //pipe_data->pipe_config.subvp_data.scale_factor_denominator = out_den; + // Prefetch lines is equal to VACTIVE + BP + VSYNC pipe_data->pipe_config.subvp_data.prefetch_lines = phantom_timing->v_total - phantom_timing->v_front_porch; diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index ef1eee2e54d9a..795d6fb0eaa97 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -1669,7 +1669,7 @@ static bool dcn321_resource_construct( dc->caps.subvp_prefetch_end_to_mall_start_us = 15; dc->caps.subvp_swath_height_margin_lines = 16; dc->caps.subvp_pstate_allow_width_us = 20; - + dc->caps.subvp_vertical_int_margin_us = 30; dc->caps.max_slave_planes = 1; dc->caps.max_slave_yuv_planes = 1; dc->caps.max_slave_rgb_planes = 1; From 836b994d93b96183274dcadd1f47b567dac55814 Mon Sep 17 00:00:00 2001 From: Shane Xiao Date: Mon, 15 Aug 2022 16:32:15 +0800 Subject: [PATCH 50/56] drm/amdgpu: Add secure display TA load for Renoir Add secure display TA load for Renoir Signed-off-by: Shane Xiao Reviewed-by: Aaron Liu --- drivers/gpu/drm/amd/amdgpu/psp_v12_0.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c index a2588200ea580..0b2ac418e4ac4 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -101,6 +101,16 @@ static int psp_v12_0_init_microcode(struct psp_context *psp) adev->psp.dtm_context.context.bin_desc.start_addr = (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr + le32_to_cpu(ta_hdr->dtm.offset_bytes); + + if (adev->apu_flags & AMD_APU_IS_RENOIR) { + adev->psp.securedisplay_context.context.bin_desc.fw_version = + le32_to_cpu(ta_hdr->securedisplay.fw_version); + adev->psp.securedisplay_context.context.bin_desc.size_bytes = + le32_to_cpu(ta_hdr->securedisplay.size_bytes); + adev->psp.securedisplay_context.context.bin_desc.start_addr = + (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr + + le32_to_cpu(ta_hdr->securedisplay.offset_bytes); + } } return 0; From 514ceec053ce032e1a77bd553c894d4271b75867 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Fri, 12 Aug 2022 15:23:51 -0400 Subject: [PATCH 51/56] drm/amdgpu: Fix interrupt handling on ih_soft ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are no backing hardware registers for ih_soft ring. As a result, don't try to access hardware registers for read and write pointers when processing interrupts on the IH soft ring. Signed-off-by: Mukul Joshi Acked-by: Christian König Reviewed-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 7 ++++++- drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 7 ++++++- drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 7 ++++++- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index 4b5396d3e60f6..eec13cb5bf758 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -409,9 +409,11 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev, u32 wptr, tmp; struct amdgpu_ih_regs *ih_regs; - if (ih == &adev->irq.ih) { + if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) { /* Only ring0 supports writeback. On other rings fall back * to register-based code with overflow checking below. + * ih_soft ring doesn't have any backing hardware registers, + * update wptr and return. */ wptr = le32_to_cpu(*ih->wptr_cpu); @@ -483,6 +485,9 @@ static void navi10_ih_set_rptr(struct amdgpu_device *adev, { struct amdgpu_ih_regs *ih_regs; + if (ih == &adev->irq.ih_soft) + return; + if (ih->use_doorbell) { /* XXX check if swapping is necessary on BE */ *ih->rptr_cpu = ih->rptr; diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index cdd599a081258..03b7066471f9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -334,9 +334,11 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev, u32 wptr, tmp; struct amdgpu_ih_regs *ih_regs; - if (ih == &adev->irq.ih) { + if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) { /* Only ring0 supports writeback. On other rings fall back * to register-based code with overflow checking below. + * ih_soft ring doesn't have any backing hardware registers, + * update wptr and return. */ wptr = le32_to_cpu(*ih->wptr_cpu); @@ -409,6 +411,9 @@ static void vega10_ih_set_rptr(struct amdgpu_device *adev, { struct amdgpu_ih_regs *ih_regs; + if (ih == &adev->irq.ih_soft) + return; + if (ih->use_doorbell) { /* XXX check if swapping is necessary on BE */ *ih->rptr_cpu = ih->rptr; diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index 3b4eb8285943c..2022ffbb8dba5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -385,9 +385,11 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev, u32 wptr, tmp; struct amdgpu_ih_regs *ih_regs; - if (ih == &adev->irq.ih) { + if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) { /* Only ring0 supports writeback. On other rings fall back * to register-based code with overflow checking below. + * ih_soft ring doesn't have any backing hardware registers, + * update wptr and return. */ wptr = le32_to_cpu(*ih->wptr_cpu); @@ -461,6 +463,9 @@ static void vega20_ih_set_rptr(struct amdgpu_device *adev, { struct amdgpu_ih_regs *ih_regs; + if (ih == &adev->irq.ih_soft) + return; + if (ih->use_doorbell) { /* XXX check if swapping is necessary on BE */ *ih->rptr_cpu = ih->rptr; From 35d3bc81ff021e87b1fa99be0f5d57fa93cfed0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 15 Aug 2022 08:39:31 -0300 Subject: [PATCH 52/56] drm/amdgpu: Fix use-after-free on amdgpu_bo_list mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If amdgpu_cs_vm_handling returns r != 0, then it will unlock the bo_list_mutex inside the function amdgpu_cs_vm_handling and again on amdgpu_cs_parser_fini. This problem results in the following use-after-free problem: [ 220.280990] ------------[ cut here ]------------ [ 220.281000] refcount_t: underflow; use-after-free. [ 220.281019] WARNING: CPU: 1 PID: 3746 at lib/refcount.c:28 refcount_warn_saturate+0xba/0x110 [ 220.281029] ------------[ cut here ]------------ [ 220.281415] CPU: 1 PID: 3746 Comm: chrome:cs0 Tainted: G W L ------- --- 5.20.0-0.rc0.20220812git7ebfc85e2cd7.10.fc38.x86_64 #1 [ 220.281421] Hardware name: System manufacturer System Product Name/ROG STRIX X570-I GAMING, BIOS 4403 04/27/2022 [ 220.281426] RIP: 0010:refcount_warn_saturate+0xba/0x110 [ 220.281431] Code: 01 01 e8 79 4a 6f 00 0f 0b e9 42 47 a5 00 80 3d de 7e be 01 00 75 85 48 c7 c7 f8 98 8e 98 c6 05 ce 7e be 01 01 e8 56 4a 6f 00 <0f> 0b e9 1f 47 a5 00 80 3d b9 7e be 01 00 0f 85 5e ff ff ff 48 c7 [ 220.281437] RSP: 0018:ffffb4b0d18d7a80 EFLAGS: 00010282 [ 220.281443] RAX: 0000000000000026 RBX: 0000000000000003 RCX: 0000000000000000 [ 220.281448] RDX: 0000000000000001 RSI: ffffffff988d06dc RDI: 00000000ffffffff [ 220.281452] RBP: 00000000ffffffff R08: 0000000000000000 R09: ffffb4b0d18d7930 [ 220.281457] R10: 0000000000000003 R11: ffffa0672e2fffe8 R12: ffffa058ca360400 [ 220.281461] R13: ffffa05846c50a18 R14: 00000000fffffe00 R15: 0000000000000003 [ 220.281465] FS: 00007f82683e06c0(0000) GS:ffffa066e2e00000(0000) knlGS:0000000000000000 [ 220.281470] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 220.281475] CR2: 00003590005cc000 CR3: 00000001fca46000 CR4: 0000000000350ee0 [ 220.281480] Call Trace: [ 220.281485] [ 220.281490] amdgpu_cs_ioctl+0x4e2/0x2070 [amdgpu] [ 220.281806] ? amdgpu_cs_find_mapping+0xe0/0xe0 [amdgpu] [ 220.282028] drm_ioctl_kernel+0xa4/0x150 [ 220.282043] drm_ioctl+0x21f/0x420 [ 220.282053] ? amdgpu_cs_find_mapping+0xe0/0xe0 [amdgpu] [ 220.282275] ? lock_release+0x14f/0x460 [ 220.282282] ? _raw_spin_unlock_irqrestore+0x30/0x60 [ 220.282290] ? _raw_spin_unlock_irqrestore+0x30/0x60 [ 220.282297] ? lockdep_hardirqs_on+0x7d/0x100 [ 220.282305] ? _raw_spin_unlock_irqrestore+0x40/0x60 [ 220.282317] amdgpu_drm_ioctl+0x4a/0x80 [amdgpu] [ 220.282534] __x64_sys_ioctl+0x90/0xd0 [ 220.282545] do_syscall_64+0x5b/0x80 [ 220.282551] ? futex_wake+0x6c/0x150 [ 220.282568] ? lock_is_held_type+0xe8/0x140 [ 220.282580] ? do_syscall_64+0x67/0x80 [ 220.282585] ? lockdep_hardirqs_on+0x7d/0x100 [ 220.282592] ? do_syscall_64+0x67/0x80 [ 220.282597] ? do_syscall_64+0x67/0x80 [ 220.282602] ? lockdep_hardirqs_on+0x7d/0x100 [ 220.282609] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 220.282616] RIP: 0033:0x7f8282a4f8bf [ 220.282639] Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <89> c2 3d 00 f0 ff ff 77 18 48 8b 44 24 18 64 48 2b 04 25 28 00 00 [ 220.282644] RSP: 002b:00007f82683df410 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 220.282651] RAX: ffffffffffffffda RBX: 00007f82683df588 RCX: 00007f8282a4f8bf [ 220.282655] RDX: 00007f82683df4d0 RSI: 00000000c0186444 RDI: 0000000000000018 [ 220.282659] RBP: 00007f82683df4d0 R08: 00007f82683df5e0 R09: 00007f82683df4b0 [ 220.282663] R10: 00001d04000a0600 R11: 0000000000000246 R12: 00000000c0186444 [ 220.282667] R13: 0000000000000018 R14: 00007f82683df588 R15: 0000000000000003 [ 220.282689] [ 220.282693] irq event stamp: 6232311 [ 220.282697] hardirqs last enabled at (6232319): [] __up_console_sem+0x5e/0x70 [ 220.282704] hardirqs last disabled at (6232326): [] __up_console_sem+0x43/0x70 [ 220.282709] softirqs last enabled at (6232072): [] __irq_exit_rcu+0xf9/0x170 [ 220.282716] softirqs last disabled at (6232061): [] __irq_exit_rcu+0xf9/0x170 [ 220.282722] ---[ end trace 0000000000000000 ]--- Therefore, remove the mutex_unlock from the amdgpu_cs_vm_handling function, so that amdgpu_cs_submit and amdgpu_cs_parser_fini can handle the unlock. Fixes: 90af0ca047f3 ("drm/amdgpu: Protect the amdgpu_bo_list list with a mutex v2") Reported-by: Mikhail Gavrilov Reviewed-by: Christian König Reviewed-by: Melissa Wen Signed-off-by: Maíra Canal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 6ba5e8ac5e893..0db1aa66a4437 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -957,16 +957,12 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) continue; r = amdgpu_vm_bo_update(adev, bo_va, false); - if (r) { - mutex_unlock(&p->bo_list->bo_list_mutex); + if (r) return r; - } r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update); - if (r) { - mutex_unlock(&p->bo_list->bo_list_mutex); + if (r) return r; - } } r = amdgpu_vm_handle_moved(adev, vm); From 523273b4dd27dc86e14b49d7fe977b3b42d0c89c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Tue, 16 Aug 2022 10:44:10 +0200 Subject: [PATCH 53/56] Revert "drm/amd/amdgpu: add pipe1 hardware support" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 4c7631800e6bf0eced08dd7b4f793fcd972f597d. Triggered GFX hangs with GNOME Wayland on Navi 21. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2117 Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index f61d7c7c9d262..8992e60c90b22 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4846,7 +4846,7 @@ static int gfx_v10_0_sw_init(void *handle) case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 7): adev->gfx.me.num_me = 1; - adev->gfx.me.num_pipe_per_me = 2; + adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; adev->gfx.mec.num_mec = 2; adev->gfx.mec.num_pipe_per_mec = 4; From cdaecc37a3cb35816d81caab6ea2a7aa47b56d6a Mon Sep 17 00:00:00 2001 From: Leslie Shi Date: Thu, 18 Aug 2022 10:08:08 +0800 Subject: [PATCH 54/56] Revert "drm/amdkcl: fix build error" This reverts commit 33ece8ccc5ae36685f20ecdd8e65d876d52aae4e. Signed-off-by: Leslie Shi Reviewed-by: Asher Song --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 10 ++-- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 50 ++++++------------- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 2 +- 4 files changed, 23 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 540178d166929..2b60e7da37250 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -164,7 +164,7 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev, int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, enum TLB_FLUSH_TYPE flush_type); -int amdgpu_amdkfd_copy_mem_to_mem(struct amdgpu_device *adev, struct kgd_mem *src_mem, +int amdgpu_amdkfd_copy_mem_to_mem(struct kgd_dev *kgd, struct kgd_mem *src_mem, uint64_t src_offset, struct kgd_mem *dst_mem, uint64_t dest_offset, uint64_t size, struct dma_fence **f, uint64_t *actual_size); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 7f02641d3469e..0486f2fe56cd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -3250,11 +3250,12 @@ int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem) return 0; } -int amdgpu_amdkfd_copy_mem_to_mem(struct amdgpu_device *adev, struct kgd_mem *src_mem, +int amdgpu_amdkfd_copy_mem_to_mem(struct kgd_dev *kgd, struct kgd_mem *src_mem, uint64_t src_offset, struct kgd_mem *dst_mem, uint64_t dst_offset, uint64_t size, struct dma_fence **f, uint64_t *actual_size) { + struct amdgpu_device *adev = NULL; struct amdgpu_copy_mem src, dst; struct ww_acquire_ctx ticket; struct list_head list, duplicates; @@ -3262,18 +3263,19 @@ int amdgpu_amdkfd_copy_mem_to_mem(struct amdgpu_device *adev, struct kgd_mem *sr struct dma_fence *fence = NULL; int i, r; - if (!adev|| !src_mem || !dst_mem || !actual_size) + if (!kgd || !src_mem || !dst_mem || !actual_size) return -EINVAL; *actual_size = 0; + adev = get_amdgpu_device(kgd); INIT_LIST_HEAD(&list); INIT_LIST_HEAD(&duplicates); src.bo = &src_mem->bo->tbo; dst.bo = &dst_mem->bo->tbo; - src.mem = src.bo->resource; - dst.mem = dst.bo->resource; + src.mem = &src.bo->mem; + dst.mem = &dst.bo->mem; src.offset = src_offset; dst.offset = dst_offset; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index d7fb67b63f4bf..5c14e91d3f89f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1698,7 +1698,7 @@ static int kfd_create_sg_table_from_userptr_bo(struct kfd_bo *bo, flags = FOLL_WRITE; locked = 1; mmap_read_lock(mm); - n = kcl_get_user_pages_remote(task, mm, pa, nents, flags, process_pages, + n = get_user_pages_remote(mm, pa, nents, flags, process_pages, NULL, &locked); if (locked) mmap_read_unlock(mm); @@ -1743,13 +1743,11 @@ static void kfd_free_cma_bos(struct cma_iter *ci) list_for_each_entry_safe(cma_bo, tmp, &ci->cma_list, list) { struct kfd_dev *dev = cma_bo->dev; - struct kfd_process_device *pdd; /* sg table is deleted by free_memory_of_gpu */ if (cma_bo->sg) kfd_put_sg_table(cma_bo->sg); - pdd = kfd_get_process_device_data(dev, ci->p); - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, cma_bo->mem, pdd->drm_priv, NULL); + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, cma_bo->mem, NULL); list_del(&cma_bo->list); kfree(cma_bo); } @@ -1845,10 +1843,9 @@ static int kfd_create_cma_system_bo(struct kfd_dev *kdev, struct kfd_bo *bo, goto pdd_fail; } - ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, 0ULL, bo_size, + ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, 0ULL, bo_size, pdd->drm_priv, cbo->sg, - &cbo->mem, NULL, flags, - false); + &cbo->mem, NULL, flags); mutex_unlock(&p->mutex); if (ret) { pr_err("Failed to create shadow system BO %d\n", ret); @@ -1856,7 +1853,7 @@ static int kfd_create_cma_system_bo(struct kfd_dev *kdev, struct kfd_bo *bo, } if (bo->mem_type == KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - ret = amdgpu_amdkfd_copy_mem_to_mem(kdev->adev, bo->mem, + ret = amdgpu_amdkfd_copy_mem_to_mem(kdev->kgd, bo->mem, offset, cbo->mem, 0, bo_size, &f, size); if (ret) { @@ -1881,7 +1878,7 @@ static int kfd_create_cma_system_bo(struct kfd_dev *kdev, struct kfd_bo *bo, return ret; copy_fail: - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->adev, bo->mem, pdd->drm_priv, NULL); + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, bo->mem, NULL); pdd_fail: if (cbo->sg) { kfd_put_sg_table(cbo->sg); @@ -2038,7 +2035,7 @@ static int kfd_copy_userptr_bos(struct cma_iter *si, struct cma_iter *di, nl = min_t(unsigned int, MAX_PP_KMALLOC_COUNT, nents); locked = 1; mmap_read_lock(ri->mm); - nl = kcl_get_user_pages_remote(ri->task, ri->mm, rva, nl, + nl = get_user_pages_remote(ri->mm, rva, nl, flags, process_pages, NULL, &locked); if (locked) @@ -2115,9 +2112,9 @@ static int kfd_create_kgd_mem(struct kfd_dev *kdev, uint64_t size, return -EINVAL; } - ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, 0ULL, size, + ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, 0ULL, size, pdd->drm_priv, NULL, - mem, NULL, flags, false); + mem, NULL, flags); mutex_unlock(&p->mutex); if (ret) { pr_err("Failed to create shadow system BO %d\n", ret); @@ -2129,28 +2126,11 @@ static int kfd_create_kgd_mem(struct kfd_dev *kdev, uint64_t size, static int kfd_destroy_kgd_mem(struct kgd_mem *mem) { - struct amdgpu_device *adev; - struct task_struct *task; - struct kfd_process *p; - struct kfd_process_device *pdd; - uint32_t gpu_id, gpu_idx; - int r; - if (!mem) return -EINVAL; - adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); - task = get_pid_task(mem->process_info->pid, PIDTYPE_PID); - p = kfd_get_process(task); - r = kfd_process_gpuid_from_adev(p, adev, &gpu_id, &gpu_idx); - if (r < 0) { - pr_warn("no gpu id found, mem maybe leaking\n"); - return -EINVAL; - } - pdd = kfd_process_device_from_gpuidx(p, gpu_idx); - /* param adev is not used*/ - return amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, mem, pdd->drm_priv, NULL); + return amdgpu_amdkfd_gpuvm_free_memory_of_gpu(NULL, mem, NULL); } /* Copies @size bytes from si->cur_bo to di->cur_bo starting at their @@ -2207,7 +2187,7 @@ static int kfd_copy_bos(struct cma_iter *si, struct cma_iter *di, dst_mem = di->cma_bo->mem; dst_offset = di->bo_offset & (PAGE_SIZE - 1); list_add_tail(&di->cma_bo->list, &di->cma_list); - } else if (src_bo->dev->adev != dst_bo->dev->adev) { + } else if (src_bo->dev->kgd != dst_bo->dev->kgd) { /* This indicates that atleast on of the BO is in local mem. * If both are in local mem of different devices then create an * intermediate System BO and do a double copy @@ -2228,7 +2208,7 @@ static int kfd_copy_bos(struct cma_iter *si, struct cma_iter *di, return -EINVAL; } - if (amdgpu_amdkfd_copy_mem_to_mem(src_bo->dev->adev, + if (amdgpu_amdkfd_copy_mem_to_mem(src_bo->dev->kgd, src_bo->mem, si->bo_offset, *tmp_mem, 0, size, f, &size)) @@ -2250,7 +2230,7 @@ static int kfd_copy_bos(struct cma_iter *si, struct cma_iter *di, return -EINVAL; } - err = amdgpu_amdkfd_copy_mem_to_mem(dev->adev, src_mem, src_offset, + err = amdgpu_amdkfd_copy_mem_to_mem(dev->kgd, src_mem, src_offset, dst_mem, dst_offset, size, f, copied); /* The tmp_bo allocates additional memory. So it is better to wait and @@ -3447,7 +3427,7 @@ static int criu_restore_memory_of_gpu_ipc(struct kfd_process_device *pdd, */ ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(dev->adev, bo_bucket->addr, bo_bucket->size, pdd->drm_priv, - NULL, kgd_mem, &offset, + kgd_mem, &offset, bo_bucket->alloc_flags, true); if (ret) { pr_err("Could not create the BO\n"); @@ -3538,7 +3518,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd, } /* Create the BO */ ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(pdd->dev->adev, bo_bucket->addr, - bo_bucket->size, pdd->drm_priv, NULL, kgd_mem, + bo_bucket->size, pdd->drm_priv, kgd_mem, &offset, bo_bucket->alloc_flags, criu_resume); if (ret) { pr_err("Could not create the BO\n"); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 43697b3e4c9c2..99457fb854e65 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -717,7 +717,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, int err; err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, gpu_va, size, - pdd->drm_priv, NULL, mem, NULL, + pdd->drm_priv, NULL, &mem, NULL, flags, false); if (err) goto err_alloc_mem; From eba10fa9a1b07c205a657d3f848af55f2e3f1d53 Mon Sep 17 00:00:00 2001 From: Leslie Shi Date: Thu, 18 Aug 2022 10:14:19 +0800 Subject: [PATCH 55/56] Revert "drm/amdkfd: Add CMA API" This reverts commit 9980ee28b7cf743198f3450c8357135baff4fe8b. Signed-off-by: Leslie Shi Reviewed-by: Asher Song --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 7 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 86 +- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 846 +----------------- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 32 - drivers/gpu/drm/amd/amdkfd/kfd_process.c | 2 +- include/uapi/linux/kfd_ioctl.h | 35 - 6 files changed, 5 insertions(+), 1003 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 2b60e7da37250..4c659178ed4b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -164,11 +164,6 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev, int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, enum TLB_FLUSH_TYPE flush_type); -int amdgpu_amdkfd_copy_mem_to_mem(struct kgd_dev *kgd, struct kgd_mem *src_mem, - uint64_t src_offset, struct kgd_mem *dst_mem, - uint64_t dest_offset, uint64_t size, struct dma_fence **f, - uint64_t *actual_size); - bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev); @@ -292,7 +287,7 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_device *adev, uint64_t va, uint64_t size, - void *drm_priv, struct sg_table *sg, struct kgd_mem **mem, + void *drm_priv, struct kgd_mem **mem, uint64_t *offset, uint32_t flags, bool criu_resume); int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 0486f2fe56cd2..0151a50124d0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1668,11 +1668,12 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev) int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_device *adev, uint64_t va, uint64_t size, - void *drm_priv, struct sg_table *sg, struct kgd_mem **mem, + void *drm_priv, struct kgd_mem **mem, uint64_t *offset, uint32_t flags, bool criu_resume) { struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); enum ttm_bo_type bo_type = ttm_bo_type_device; + struct sg_table *sg = NULL; uint64_t user_addr = 0; struct amdgpu_bo *bo; struct drm_gem_object *gobj = NULL; @@ -1713,10 +1714,6 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( } } - if (sg) { - alloc_domain = AMDGPU_GEM_DOMAIN_CPU; - bo_type = ttm_bo_type_sg; - } *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); if (!*mem) { ret = -ENOMEM; @@ -3250,85 +3247,6 @@ int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem) return 0; } -int amdgpu_amdkfd_copy_mem_to_mem(struct kgd_dev *kgd, struct kgd_mem *src_mem, - uint64_t src_offset, struct kgd_mem *dst_mem, - uint64_t dst_offset, uint64_t size, - struct dma_fence **f, uint64_t *actual_size) -{ - struct amdgpu_device *adev = NULL; - struct amdgpu_copy_mem src, dst; - struct ww_acquire_ctx ticket; - struct list_head list, duplicates; - struct ttm_validate_buffer resv_list[2]; - struct dma_fence *fence = NULL; - int i, r; - - if (!kgd || !src_mem || !dst_mem || !actual_size) - return -EINVAL; - - *actual_size = 0; - - adev = get_amdgpu_device(kgd); - INIT_LIST_HEAD(&list); - INIT_LIST_HEAD(&duplicates); - - src.bo = &src_mem->bo->tbo; - dst.bo = &dst_mem->bo->tbo; - src.mem = &src.bo->mem; - dst.mem = &dst.bo->mem; - src.offset = src_offset; - dst.offset = dst_offset; - - resv_list[0].bo = src.bo; - resv_list[1].bo = dst.bo; - - for (i = 0; i < 2; i++) { - resv_list[i].num_shared = 1; - list_add_tail(&resv_list[i].head, &list); - } - - r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates); - if (r) { - pr_err("Copy buffer failed. Unable to reserve bo (%d)\n", r); - return r; - } - - /* The process to which the Source and Dest BOs belong to could be - * evicted and the BOs invalidated. So validate BOs before use - */ - r = amdgpu_amdkfd_bo_validate(src_mem->bo, src_mem->domain, false); - if (r) { - pr_err("CMA fail: SRC BO validate failed %d\n", r); - goto validate_fail; - } - - - r = amdgpu_amdkfd_bo_validate(dst_mem->bo, dst_mem->domain, false); - if (r) { - pr_err("CMA fail: DST BO validate failed %d\n", r); - goto validate_fail; - } - - - r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, size, false, NULL, - &fence); - if (r) - pr_err("Copy buffer failed %d\n", r); - else - *actual_size = size; - if (fence) { - amdgpu_bo_fence(src_mem->bo, fence, true); - amdgpu_bo_fence(dst_mem->bo, fence, true); - } - if (f) - *f = dma_fence_get(fence); - dma_fence_put(fence); - -validate_fail: - ttm_eu_backoff_reservation(&ticket, &list); - return r; -} - /* Returns GPU-specific tiling mode information */ int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev, struct tile_config *config) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 5c14e91d3f89f..f16b7206c858f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1150,7 +1150,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( dev->adev, args->va_addr, args->size, - pdd->drm_priv, NULL, (struct kgd_mem **) &mem, &offset, + pdd->drm_priv, (struct kgd_mem **) &mem, &offset, flags, false); if (err) @@ -1628,847 +1628,6 @@ static int kfd_ioctl_ipc_import_handle(struct file *filep, return r; } -#ifndef PTRACE_MODE_ATTACH_REALCREDS -#define PTRACE_MODE_ATTACH_REALCREDS PTRACE_MODE_ATTACH -#endif - -/* Maximum number of entries for process pages array which lives on stack */ -#define MAX_PP_STACK_COUNT 16 -/* Maximum number of pages kmalloc'd to hold struct page's during copy */ -#define MAX_KMALLOC_PAGES (PAGE_SIZE * 2) -#define MAX_PP_KMALLOC_COUNT (MAX_KMALLOC_PAGES/sizeof(struct page *)) - -static void kfd_put_sg_table(struct sg_table *sg) -{ - unsigned int i; - struct scatterlist *s; - - for_each_sg(sg->sgl, s, sg->nents, i) - put_page(sg_page(s)); -} - - -/* Create a sg table for the given userptr BO by pinning its system pages - * @bo: userptr BO - * @offset: Offset into BO - * @mm/@task: mm_struct & task_struct of the process that holds the BO - * @size: in/out: desired size / actual size which could be smaller - * @sg_size: out: Size of sg table. This is ALIGN_UP(@size) - * @ret_sg: out sg table - */ -static int kfd_create_sg_table_from_userptr_bo(struct kfd_bo *bo, - int64_t offset, int cma_write, - struct mm_struct *mm, - struct task_struct *task, - uint64_t *size, - uint64_t *sg_size, - struct sg_table **ret_sg) -{ - int ret, locked = 1; - struct sg_table *sg = NULL; - unsigned int i, offset_in_page, flags = 0; - unsigned long nents, n; - unsigned long pa = (bo->cpuva + offset) & PAGE_MASK; - unsigned int cur_page = 0; - struct scatterlist *s; - uint64_t sz = *size; - struct page **process_pages; - - *sg_size = 0; - sg = kmalloc(sizeof(*sg), GFP_KERNEL); - if (!sg) - return -ENOMEM; - - offset_in_page = offset & (PAGE_SIZE - 1); - nents = (sz + offset_in_page + PAGE_SIZE - 1) / PAGE_SIZE; - - ret = sg_alloc_table(sg, nents, GFP_KERNEL); - if (unlikely(ret)) { - ret = -ENOMEM; - goto sg_alloc_fail; - } - process_pages = kmalloc_array(nents, sizeof(struct pages *), - GFP_KERNEL); - if (!process_pages) { - ret = -ENOMEM; - goto page_alloc_fail; - } - - if (cma_write) - flags = FOLL_WRITE; - locked = 1; - mmap_read_lock(mm); - n = get_user_pages_remote(mm, pa, nents, flags, process_pages, - NULL, &locked); - if (locked) - mmap_read_unlock(mm); - if (n <= 0) { - pr_err("CMA: Invalid virtual address 0x%lx\n", pa); - ret = -EFAULT; - goto get_user_fail; - } - if (n != nents) { - /* Pages pinned < requested. Set the size accordingly */ - *size = (n * PAGE_SIZE) - offset_in_page; - pr_debug("Requested %lx but pinned %lx\n", nents, n); - } - - sz = 0; - for_each_sg(sg->sgl, s, n, i) { - sg_set_page(s, process_pages[cur_page], PAGE_SIZE, - offset_in_page); - sg_dma_address(s) = page_to_phys(process_pages[cur_page]); - offset_in_page = 0; - cur_page++; - sz += PAGE_SIZE; - } - *ret_sg = sg; - *sg_size = sz; - - kfree(process_pages); - return 0; - -get_user_fail: - kfree(process_pages); -page_alloc_fail: - sg_free_table(sg); -sg_alloc_fail: - kfree(sg); - return ret; -} - -static void kfd_free_cma_bos(struct cma_iter *ci) -{ - struct cma_system_bo *cma_bo, *tmp; - - list_for_each_entry_safe(cma_bo, tmp, &ci->cma_list, list) { - struct kfd_dev *dev = cma_bo->dev; - - /* sg table is deleted by free_memory_of_gpu */ - if (cma_bo->sg) - kfd_put_sg_table(cma_bo->sg); - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, cma_bo->mem, NULL); - list_del(&cma_bo->list); - kfree(cma_bo); - } -} - -/* 1 second timeout */ -#define CMA_WAIT_TIMEOUT msecs_to_jiffies(1000) - -static int kfd_cma_fence_wait(struct dma_fence *f) -{ - int ret; - - ret = dma_fence_wait_timeout(f, false, CMA_WAIT_TIMEOUT); - if (likely(ret > 0)) - return 0; - if (!ret) - ret = -ETIME; - return ret; -} - -/* Put previous (old) fence @pf but it waits for @pf to signal if the context - * of the current fence @cf is different. - */ -static int kfd_fence_put_wait_if_diff_context(struct dma_fence *cf, - struct dma_fence *pf) -{ - int ret = 0; - - if (pf && cf && cf->context != pf->context) - ret = kfd_cma_fence_wait(pf); - dma_fence_put(pf); - return ret; -} - -#define MAX_SYSTEM_BO_SIZE (512*PAGE_SIZE) - -/* Create an equivalent system BO for the given @bo. If @bo is a userptr then - * create a new system BO by pinning underlying system pages of the given - * userptr BO. If @bo is in Local Memory then create an empty system BO and - * then copy @bo into this new BO. - * @bo: Userptr BO or Local Memory BO - * @offset: Offset into bo - * @size: in/out: The size of the new BO could be less than requested if all - * the pages couldn't be pinned or size > MAX_SYSTEM_BO_SIZE. This would - * be reflected in @size - * @mm/@task: mm/task to which @bo belongs to - * @cma_bo: out: new system BO - */ -static int kfd_create_cma_system_bo(struct kfd_dev *kdev, struct kfd_bo *bo, - uint64_t *size, uint64_t offset, - int cma_write, struct kfd_process *p, - struct mm_struct *mm, - struct task_struct *task, - struct cma_system_bo **cma_bo) -{ - int ret; - struct kfd_process_device *pdd = NULL; - struct cma_system_bo *cbo; - uint64_t bo_size = 0; - struct dma_fence *f; - - uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE | - KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE; - - *cma_bo = NULL; - cbo = kzalloc(sizeof(**cma_bo), GFP_KERNEL); - if (!cbo) - return -ENOMEM; - - INIT_LIST_HEAD(&cbo->list); - if (bo->mem_type == KFD_IOC_ALLOC_MEM_FLAGS_VRAM) - bo_size = min_t(uint64_t, *size, MAX_SYSTEM_BO_SIZE); - else if (bo->cpuva) { - ret = kfd_create_sg_table_from_userptr_bo(bo, offset, - cma_write, mm, task, - size, &bo_size, - &cbo->sg); - if (ret) { - pr_err("CMA: BO create with sg failed %d\n", ret); - goto sg_fail; - } - } else { - WARN_ON(1); - ret = -EINVAL; - goto sg_fail; - } - mutex_lock(&p->mutex); - pdd = kfd_get_process_device_data(kdev, p); - if (!pdd) { - mutex_unlock(&p->mutex); - pr_err("Process device data doesn't exist\n"); - ret = -EINVAL; - goto pdd_fail; - } - - ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, 0ULL, bo_size, - pdd->drm_priv, cbo->sg, - &cbo->mem, NULL, flags); - mutex_unlock(&p->mutex); - if (ret) { - pr_err("Failed to create shadow system BO %d\n", ret); - goto pdd_fail; - } - - if (bo->mem_type == KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - ret = amdgpu_amdkfd_copy_mem_to_mem(kdev->kgd, bo->mem, - offset, cbo->mem, 0, - bo_size, &f, size); - if (ret) { - pr_err("CMA: Intermediate copy failed %d\n", ret); - goto copy_fail; - } - - /* Wait for the copy to finish as subsequent copy will be done - * by different device - */ - ret = kfd_cma_fence_wait(f); - dma_fence_put(f); - if (ret) { - pr_err("CMA: Intermediate copy timed out %d\n", ret); - goto copy_fail; - } - } - - cbo->dev = kdev; - *cma_bo = cbo; - - return ret; - -copy_fail: - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, bo->mem, NULL); -pdd_fail: - if (cbo->sg) { - kfd_put_sg_table(cbo->sg); - sg_free_table(cbo->sg); - kfree(cbo->sg); - } -sg_fail: - kfree(cbo); - return ret; -} - -/* Update cma_iter.cur_bo with KFD BO that is assocaited with - * cma_iter.array.va_addr - */ -static int kfd_cma_iter_update_bo(struct cma_iter *ci) -{ - struct kfd_memory_range *arr = ci->array; - uint64_t va_end = arr->va_addr + arr->size - 1; - - mutex_lock(&ci->p->mutex); - ci->cur_bo = kfd_process_find_bo_from_interval(ci->p, arr->va_addr, - va_end); - mutex_unlock(&ci->p->mutex); - - if (!ci->cur_bo || va_end > ci->cur_bo->it.last) { - pr_err("CMA failed. Range out of bounds\n"); - return -EFAULT; - } - return 0; -} - -/* Advance iter by @size bytes. */ -static int kfd_cma_iter_advance(struct cma_iter *ci, unsigned long size) -{ - int ret = 0; - - ci->offset += size; - if (WARN_ON(size > ci->total || ci->offset > ci->array->size)) - return -EFAULT; - ci->total -= size; - /* If current range is copied, move to next range if available. */ - if (ci->offset == ci->array->size) { - - /* End of all ranges */ - if (!(--ci->nr_segs)) - return 0; - - ci->array++; - ci->offset = 0; - ret = kfd_cma_iter_update_bo(ci); - if (ret) - return ret; - } - ci->bo_offset = (ci->array->va_addr + ci->offset) - - ci->cur_bo->it.start; - return ret; -} - -static int kfd_cma_iter_init(struct kfd_memory_range *arr, unsigned long segs, - struct kfd_process *p, struct mm_struct *mm, - struct task_struct *task, struct cma_iter *ci) -{ - int ret; - int nr; - - if (!arr || !segs) - return -EINVAL; - - memset(ci, 0, sizeof(*ci)); - INIT_LIST_HEAD(&ci->cma_list); - ci->array = arr; - ci->nr_segs = segs; - ci->p = p; - ci->offset = 0; - ci->mm = mm; - ci->task = task; - for (nr = 0; nr < segs; nr++) - ci->total += arr[nr].size; - - /* Valid but size is 0. So copied will also be 0 */ - if (!ci->total) - return 0; - - ret = kfd_cma_iter_update_bo(ci); - if (!ret) - ci->bo_offset = arr->va_addr - ci->cur_bo->it.start; - return ret; -} - -static bool kfd_cma_iter_end(struct cma_iter *ci) -{ - if (!(ci->nr_segs) || !(ci->total)) - return true; - return false; -} - -/* Copies @size bytes from si->cur_bo to di->cur_bo BO. The function assumes - * both source and dest. BOs are userptr BOs. Both BOs can either belong to - * current process or one of the BOs can belong to a differnt - * process. @Returns 0 on success, -ve on failure - * - * @si: Source iter - * @di: Dest. iter - * @cma_write: Indicates if it is write to remote or read from remote - * @size: amount of bytes to be copied - * @copied: Return number of bytes actually copied. - */ -static int kfd_copy_userptr_bos(struct cma_iter *si, struct cma_iter *di, - bool cma_write, uint64_t size, - uint64_t *copied) -{ - int i, ret = 0, locked; - unsigned int nents, nl; - unsigned int offset_in_page; - struct page *pp_stack[MAX_PP_STACK_COUNT]; - struct page **process_pages = pp_stack; - unsigned long rva, lva = 0, flags = 0; - uint64_t copy_size, to_copy = size; - struct cma_iter *li, *ri; - - if (cma_write) { - ri = di; - li = si; - flags |= FOLL_WRITE; - } else { - li = di; - ri = si; - } - /* rva: remote virtual address. Page aligned to start page. - * rva + offset_in_page: Points to remote start address - * lva: local virtual address. Points to the start address. - * nents: computes number of remote pages to request - */ - offset_in_page = ri->bo_offset & (PAGE_SIZE - 1); - rva = (ri->cur_bo->cpuva + ri->bo_offset) & PAGE_MASK; - lva = li->cur_bo->cpuva + li->bo_offset; - - nents = (size + offset_in_page + PAGE_SIZE - 1) / PAGE_SIZE; - - copy_size = min_t(uint64_t, size, PAGE_SIZE - offset_in_page); - *copied = 0; - - if (nents > MAX_PP_STACK_COUNT) { - /* For reliability kmalloc only 2 pages worth */ - process_pages = kmalloc(min_t(size_t, MAX_KMALLOC_PAGES, - sizeof(struct pages *)*nents), - GFP_KERNEL); - - if (!process_pages) - return -ENOMEM; - } - - while (nents && to_copy) { - nl = min_t(unsigned int, MAX_PP_KMALLOC_COUNT, nents); - locked = 1; - mmap_read_lock(ri->mm); - nl = get_user_pages_remote(ri->mm, rva, nl, - flags, process_pages, NULL, - &locked); - if (locked) - mmap_read_unlock(ri->mm); - if (nl <= 0) { - pr_err("CMA: Invalid virtual address 0x%lx\n", rva); - ret = -EFAULT; - break; - } - - for (i = 0; i < nl; i++) { - unsigned int n; - void *kaddr = kmap(process_pages[i]); - - if (cma_write) { - n = copy_from_user(kaddr+offset_in_page, - (void *)lva, copy_size); - set_page_dirty(process_pages[i]); - } else { - n = copy_to_user((void *)lva, - kaddr+offset_in_page, - copy_size); - } - kunmap(kaddr); - if (n) { - ret = -EFAULT; - break; - } - to_copy -= copy_size; - if (!to_copy) - break; - lva += copy_size; - rva += (copy_size + offset_in_page); - WARN_ONCE(rva & (PAGE_SIZE - 1), - "CMA: Error in remote VA computation"); - offset_in_page = 0; - copy_size = min_t(uint64_t, to_copy, PAGE_SIZE); - } - - for (i = 0; i < nl; i++) - put_page(process_pages[i]); - - if (ret) - break; - nents -= nl; - } - - if (process_pages != pp_stack) - kfree(process_pages); - - *copied = (size - to_copy); - return ret; - -} - -static int kfd_create_kgd_mem(struct kfd_dev *kdev, uint64_t size, - struct kfd_process *p, struct kgd_mem **mem) -{ - int ret; - struct kfd_process_device *pdd = NULL; - uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE | - KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE; - - if (!mem || !size || !p || !kdev) - return -EINVAL; - - *mem = NULL; - - mutex_lock(&p->mutex); - pdd = kfd_get_process_device_data(kdev, p); - if (!pdd) { - mutex_unlock(&p->mutex); - pr_err("Process device data doesn't exist\n"); - return -EINVAL; - } - - ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, 0ULL, size, - pdd->drm_priv, NULL, - mem, NULL, flags); - mutex_unlock(&p->mutex); - if (ret) { - pr_err("Failed to create shadow system BO %d\n", ret); - return -EINVAL; - } - - return 0; -} - -static int kfd_destroy_kgd_mem(struct kgd_mem *mem) -{ - if (!mem) - return -EINVAL; - - /* param adev is not used*/ - return amdgpu_amdkfd_gpuvm_free_memory_of_gpu(NULL, mem, NULL); -} - -/* Copies @size bytes from si->cur_bo to di->cur_bo starting at their - * respective offset. - * @si: Source iter - * @di: Dest. iter - * @cma_write: Indicates if it is write to remote or read from remote - * @size: amount of bytes to be copied - * @f: Return the last fence if any - * @copied: Return number of bytes actually copied. - */ -static int kfd_copy_bos(struct cma_iter *si, struct cma_iter *di, - int cma_write, uint64_t size, - struct dma_fence **f, uint64_t *copied, - struct kgd_mem **tmp_mem) -{ - int err = 0; - struct kfd_bo *dst_bo = di->cur_bo, *src_bo = si->cur_bo; - uint64_t src_offset = si->bo_offset, dst_offset = di->bo_offset; - struct kgd_mem *src_mem = src_bo->mem, *dst_mem = dst_bo->mem; - struct kfd_dev *dev = dst_bo->dev; - int d2d = 0; - - *copied = 0; - if (f) - *f = NULL; - if (src_bo->cpuva && dst_bo->cpuva) - return kfd_copy_userptr_bos(si, di, cma_write, size, copied); - - /* If either source or dest. is userptr, create a shadow system BO - * by using the underlying userptr BO pages. Then use this shadow - * BO for copy. src_offset & dst_offset are adjusted because the new BO - * is only created for the window (offset, size) requested. - * The shadow BO is created on the other device. This means if the - * other BO is a device memory, the copy will be using that device. - * The BOs are stored in cma_list for deferred cleanup. This minimizes - * fence waiting just to the last fence. - */ - if (src_bo->cpuva) { - dev = dst_bo->dev; - err = kfd_create_cma_system_bo(dev, src_bo, &size, - si->bo_offset, cma_write, - si->p, si->mm, si->task, - &si->cma_bo); - src_mem = si->cma_bo->mem; - src_offset = si->bo_offset & (PAGE_SIZE - 1); - list_add_tail(&si->cma_bo->list, &si->cma_list); - } else if (dst_bo->cpuva) { - dev = src_bo->dev; - err = kfd_create_cma_system_bo(dev, dst_bo, &size, - di->bo_offset, cma_write, - di->p, di->mm, di->task, - &di->cma_bo); - dst_mem = di->cma_bo->mem; - dst_offset = di->bo_offset & (PAGE_SIZE - 1); - list_add_tail(&di->cma_bo->list, &di->cma_list); - } else if (src_bo->dev->kgd != dst_bo->dev->kgd) { - /* This indicates that atleast on of the BO is in local mem. - * If both are in local mem of different devices then create an - * intermediate System BO and do a double copy - * [VRAM]--gpu1-->[System BO]--gpu2-->[VRAM]. - * If only one BO is in VRAM then use that GPU to do the copy - */ - if (src_bo->mem_type == KFD_IOC_ALLOC_MEM_FLAGS_VRAM && - dst_bo->mem_type == KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - dev = dst_bo->dev; - size = min_t(uint64_t, size, MAX_SYSTEM_BO_SIZE); - d2d = 1; - - if (*tmp_mem == NULL) { - if (kfd_create_kgd_mem(src_bo->dev, - MAX_SYSTEM_BO_SIZE, - si->p, - tmp_mem)) - return -EINVAL; - } - - if (amdgpu_amdkfd_copy_mem_to_mem(src_bo->dev->kgd, - src_bo->mem, si->bo_offset, - *tmp_mem, 0, - size, f, &size)) - /* tmp_mem will be freed in caller.*/ - return -EINVAL; - - kfd_cma_fence_wait(*f); - dma_fence_put(*f); - - src_mem = *tmp_mem; - src_offset = 0; - } else if (src_bo->mem_type == KFD_IOC_ALLOC_MEM_FLAGS_VRAM) - dev = src_bo->dev; - /* else already set to dst_bo->dev */ - } - - if (err) { - pr_err("Failed to create system BO %d", err); - return -EINVAL; - } - - err = amdgpu_amdkfd_copy_mem_to_mem(dev->kgd, src_mem, src_offset, - dst_mem, dst_offset, size, f, - copied); - /* The tmp_bo allocates additional memory. So it is better to wait and - * delete. Also since multiple GPUs are involved the copies are - * currently not pipelined. - */ - if (*tmp_mem && d2d) { - if (!err) { - kfd_cma_fence_wait(*f); - dma_fence_put(*f); - *f = NULL; - } - } - return err; -} - -/* Copy single range from source iterator @si to destination iterator @di. - * @si will move to next range and @di will move by bytes copied. - * @return : 0 for success or -ve for failure - * @f: The last fence if any - * @copied: out: number of bytes copied - */ -static int kfd_copy_single_range(struct cma_iter *si, struct cma_iter *di, - bool cma_write, struct dma_fence **f, - uint64_t *copied, struct kgd_mem **tmp_mem) -{ - int err = 0; - uint64_t copy_size, n; - uint64_t size = si->array->size; - struct kfd_bo *src_bo = si->cur_bo; - struct dma_fence *lfence = NULL; - - if (!src_bo || !di || !copied) - return -EINVAL; - *copied = 0; - if (f) - *f = NULL; - - while (size && !kfd_cma_iter_end(di)) { - struct dma_fence *fence = NULL; - - copy_size = min(size, (di->array->size - di->offset)); - - err = kfd_copy_bos(si, di, cma_write, copy_size, - &fence, &n, tmp_mem); - if (err) { - pr_err("CMA %d failed\n", err); - break; - } - - if (fence) { - err = kfd_fence_put_wait_if_diff_context(fence, - lfence); - lfence = fence; - if (err) - break; - } - - size -= n; - *copied += n; - err = kfd_cma_iter_advance(si, n); - if (err) - break; - err = kfd_cma_iter_advance(di, n); - if (err) - break; - } - - if (f) - *f = dma_fence_get(lfence); - dma_fence_put(lfence); - - return err; -} - -static int kfd_ioctl_cross_memory_copy(struct file *filep, - struct kfd_process *local_p, void *data) -{ - struct kfd_ioctl_cross_memory_copy_args *args = data; - struct kfd_memory_range *src_array, *dst_array; - struct kfd_process *remote_p; - struct task_struct *remote_task; - struct mm_struct *remote_mm; - struct pid *remote_pid; - struct dma_fence *lfence = NULL; - uint64_t copied = 0, total_copied = 0; - struct cma_iter di, si; - const char *cma_op; - int err = 0; - struct kgd_mem *tmp_mem = NULL; - - /* Check parameters */ - if (args->src_mem_range_array == 0 || args->dst_mem_range_array == 0 || - args->src_mem_array_size == 0 || args->dst_mem_array_size == 0) - return -EINVAL; - args->bytes_copied = 0; - - /* Allocate space for source and destination arrays */ - src_array = kmalloc_array((args->src_mem_array_size + - args->dst_mem_array_size), - sizeof(struct kfd_memory_range), - GFP_KERNEL); - if (!src_array) - return -ENOMEM; - dst_array = &src_array[args->src_mem_array_size]; - - if (copy_from_user(src_array, (void __user *)args->src_mem_range_array, - args->src_mem_array_size * - sizeof(struct kfd_memory_range))) { - err = -EFAULT; - goto copy_from_user_fail; - } - if (copy_from_user(dst_array, (void __user *)args->dst_mem_range_array, - args->dst_mem_array_size * - sizeof(struct kfd_memory_range))) { - err = -EFAULT; - goto copy_from_user_fail; - } - - /* Get remote process */ - remote_pid = find_get_pid(args->pid); - if (!remote_pid) { - pr_err("Cross mem copy failed. Invalid PID %d\n", args->pid); - err = -ESRCH; - goto copy_from_user_fail; - } - - remote_task = get_pid_task(remote_pid, PIDTYPE_PID); - if (!remote_pid) { - pr_err("Cross mem copy failed. Invalid PID or task died %d\n", - args->pid); - err = -ESRCH; - goto get_pid_task_fail; - } - - /* Check access permission */ - remote_mm = mm_access(remote_task, PTRACE_MODE_ATTACH_REALCREDS); - if (!remote_mm || IS_ERR(remote_mm)) { - err = IS_ERR(remote_mm) ? PTR_ERR(remote_mm) : -ESRCH; - if (err == -EACCES) { - pr_err("Cross mem copy failed. Permission error\n"); - err = -EPERM; - } else - pr_err("Cross mem copy failed. Invalid task %d\n", - err); - goto mm_access_fail; - } - - remote_p = kfd_get_process(remote_task); - if (IS_ERR(remote_p)) { - pr_err("Cross mem copy failed. Invalid kfd process %d\n", - args->pid); - err = -EINVAL; - goto kfd_process_fail; - } - /* Initialise cma_iter si & @di with source & destination range. */ - if (KFD_IS_CROSS_MEMORY_WRITE(args->flags)) { - cma_op = "WRITE"; - pr_debug("CMA WRITE: local -> remote\n"); - err = kfd_cma_iter_init(dst_array, args->dst_mem_array_size, - remote_p, remote_mm, remote_task, &di); - if (err) - goto kfd_process_fail; - err = kfd_cma_iter_init(src_array, args->src_mem_array_size, - local_p, current->mm, current, &si); - if (err) - goto kfd_process_fail; - } else { - cma_op = "READ"; - pr_debug("CMA READ: remote -> local\n"); - - err = kfd_cma_iter_init(dst_array, args->dst_mem_array_size, - local_p, current->mm, current, &di); - if (err) - goto kfd_process_fail; - err = kfd_cma_iter_init(src_array, args->src_mem_array_size, - remote_p, remote_mm, remote_task, &si); - if (err) - goto kfd_process_fail; - } - - /* Copy one si range at a time into di. After each call to - * kfd_copy_single_range() si will move to next range. di will be - * incremented by bytes copied - */ - while (!kfd_cma_iter_end(&si) && !kfd_cma_iter_end(&di)) { - struct dma_fence *fence = NULL; - - err = kfd_copy_single_range(&si, &di, - KFD_IS_CROSS_MEMORY_WRITE(args->flags), - &fence, &copied, &tmp_mem); - total_copied += copied; - - if (err) - break; - - /* Release old fence if a later fence is created. If no - * new fence is created, then keep the preivous fence - */ - if (fence) { - err = kfd_fence_put_wait_if_diff_context(fence, - lfence); - lfence = fence; - if (err) - break; - } - } - - /* Wait for the last fence irrespective of error condition */ - if (lfence) { - err = kfd_cma_fence_wait(lfence); - dma_fence_put(lfence); - if (err) - pr_err("CMA %s failed. BO timed out\n", cma_op); - } - - if (tmp_mem) - kfd_destroy_kgd_mem(tmp_mem); - - kfd_free_cma_bos(&si); - kfd_free_cma_bos(&di); - -kfd_process_fail: - mmput(remote_mm); -mm_access_fail: - put_task_struct(remote_task); -get_pid_task_fail: - put_pid(remote_pid); -copy_from_user_fail: - kfree(src_array); - - /* An error could happen after partial copy. In that case this will - * reflect partial amount of bytes copied - */ - args->bytes_copied = total_copied; - return err; -} - static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, struct kfd_process *p, void *data) { @@ -4066,9 +3225,6 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_IPC_EXPORT_HANDLE, kfd_ioctl_ipc_export_handle, 0), - AMDKFD_IOCTL_DEF(AMDKFD_IOC_CROSS_MEMORY_COPY, - kfd_ioctl_cross_memory_copy, 0), - AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_TRAP, kfd_ioctl_dbg_set_debug_trap, 0), diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index db2a8a070b695..182eb67edbc52 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -383,38 +383,6 @@ struct kfd_bo { unsigned int mem_type; }; -struct cma_system_bo { - struct kgd_mem *mem; - struct sg_table *sg; - struct kfd_dev *dev; - struct list_head list; -}; - -/* Similar to iov_iter */ -struct cma_iter { - /* points to current entry of range array */ - struct kfd_memory_range *array; - /* total number of entries in the initial array */ - unsigned long nr_segs; - /* total amount of data pointed by kfd array*/ - unsigned long total; - /* offset into the entry pointed by cma_iter.array */ - unsigned long offset; - struct kfd_process *p; - struct mm_struct *mm; - struct task_struct *task; - /* current kfd_bo associated with cma_iter.array.va_addr */ - struct kfd_bo *cur_bo; - /* offset w.r.t cur_bo */ - unsigned long bo_offset; - /* If cur_bo is a userptr BO, then a shadow system BO is created - * using its underlying pages. cma_bo holds this BO. cma_list is a - * list cma_bos created in one session - */ - struct cma_system_bo *cma_bo; - struct list_head cma_list; -}; - enum kfd_mempool { KFD_MEMPOOL_SYSTEM_CACHEABLE = 1, KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 99457fb854e65..5198dd636765a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -717,7 +717,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, int err; err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, gpu_va, size, - pdd->drm_priv, NULL, &mem, NULL, + pdd->drm_priv, mem, NULL, flags, false); if (err) goto err_alloc_mem; diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index d247f1e8550a2..b9b688183f1db 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -1076,37 +1076,6 @@ struct kfd_ioctl_ipc_import_handle_args { __u32 flags; /* from KFD */ }; -struct kfd_memory_range { - __u64 va_addr; - __u64 size; -}; - -/* flags definitions - * BIT0: 0: read operation, 1: write operation. - * This also identifies if the src or dst array belongs to remote process - */ -#define KFD_CROSS_MEMORY_RW_BIT (1 << 0) -#define KFD_SET_CROSS_MEMORY_READ(flags) (flags &= ~KFD_CROSS_MEMORY_RW_BIT) -#define KFD_SET_CROSS_MEMORY_WRITE(flags) (flags |= KFD_CROSS_MEMORY_RW_BIT) -#define KFD_IS_CROSS_MEMORY_WRITE(flags) (flags & KFD_CROSS_MEMORY_RW_BIT) - -struct kfd_ioctl_cross_memory_copy_args { - /* to KFD: Process ID of the remote process */ - __u32 pid; - /* to KFD: See above definition */ - __u32 flags; - /* to KFD: Source GPU VM range */ - __u64 src_mem_range_array; - /* to KFD: Size of above array */ - __u64 src_mem_array_size; - /* to KFD: Destination GPU VM range */ - __u64 dst_mem_range_array; - /* to KFD: Size of above array */ - __u64 dst_mem_array_size; - /* from KFD: Total amount of bytes copied */ - __u64 bytes_copied; -}; - /* Guarantee host access to memory */ #define KFD_IOCTL_SVM_FLAG_HOST_ACCESS 0x00000001 /* Fine grained coherency between all devices with access */ @@ -1395,9 +1364,6 @@ struct kfd_ioctl_set_xnack_mode_args { #define AMDKFD_IOC_DBG_TRAP \ AMDKFD_IOWR(0x82, struct kfd_ioctl_dbg_trap_args) -#define AMDKFD_IOC_CROSS_MEMORY_COPY \ - AMDKFD_IOWR(0x83, struct kfd_ioctl_cross_memory_copy_args) - #define AMDKFD_IOC_RLC_SPM \ AMDKFD_IOWR(0x84, struct kfd_ioctl_spm_args) @@ -1405,5 +1371,4 @@ struct kfd_ioctl_set_xnack_mode_args { #define AMDKFD_COMMAND_START_2 0x80 #define AMDKFD_COMMAND_END_2 0x85 - #endif From 2aa3a46e69541435c9eef6454fde9125f9a9c2b0 Mon Sep 17 00:00:00 2001 From: Leslie Shi Date: Thu, 18 Aug 2022 11:37:05 +0800 Subject: [PATCH 56/56] Revert "Export symbol mm_access" This reverts commit 2491f5f29bae3458ab3a9ad24316955d490b47c0. Signed-off-by: Leslie Shi Reviewed-by: Asher Song --- kernel/fork.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/fork.c b/kernel/fork.c index 518fba1f3abac..1e9ecaa4f5ac5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1401,7 +1401,6 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) return mm; } -EXPORT_SYMBOL_GPL(mm_access); static void complete_vfork_done(struct task_struct *tsk) {