From da3b36a23bb72e9742bf2f1b3e5da9615480c789 Mon Sep 17 00:00:00 2001 From: Jane Jian Date: Tue, 23 Nov 2021 19:19:40 +0800 Subject: [PATCH 01/14] drm/amdgpu/sriov/vcn: add new vcn ip revision check case for SIENNA_CICHLID [WHY] for sriov odd# vf will modify vcn0 engine ip revision(due to multimedia bandwidth feature), which will be mismatched with original vcn0 revision [HOW] add new version check for vcn0 disabled revision(3, 0, 192), typically modified under sriov mode Signed-off-by: Jane Jian Reviewed-by: Guchun Chen Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 1 + drivers/gpu/drm/amd/amdgpu/nv.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 503995c7ff6c1..f6fae79203ee1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -918,6 +918,7 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(3, 0, 64): case IP_VERSION(3, 1, 1): case IP_VERSION(3, 0, 2): + case IP_VERSION(3, 0, 192): amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); if (!amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 4f7c70845785a..585961c2f5f27 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -135,6 +135,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) break; case IP_VERSION(3, 0, 0): case IP_VERSION(3, 0, 64): + case IP_VERSION(3, 0, 192): if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) fw_name = FIRMWARE_SIENNA_CICHLID; else diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index a6659d9ecdd22..2ec1ffb36b1fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -183,6 +183,7 @@ static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode, switch (adev->ip_versions[UVD_HWIP][0]) { case IP_VERSION(3, 0, 0): case IP_VERSION(3, 0, 64): + case IP_VERSION(3, 0, 192): if (amdgpu_sriov_vf(adev)) { if (encode) *codecs = &sriov_sc_video_codecs_encode; From 3e467e478ed3a9701bb588d648d6e0ccb82ced09 Mon Sep 17 00:00:00 2001 From: Flora Cui Date: Wed, 24 Nov 2021 10:33:38 +0800 Subject: [PATCH 02/14] drm/amdgpu: cancel the correct hrtimer on exit Signed-off-by: Flora Cui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index ce982afeff913..ac9a8cd21c4b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -504,8 +504,8 @@ static int amdgpu_vkms_sw_fini(void *handle) int i = 0; for (i = 0; i < adev->mode_info.num_crtc; i++) - if (adev->mode_info.crtcs[i]) - hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer); + if (adev->amdgpu_vkms_output[i].vblank_hrtimer.function) + hrtimer_cancel(&adev->amdgpu_vkms_output[i].vblank_hrtimer); kfree(adev->mode_info.bios_hardcoded_edid); kfree(adev->amdgpu_vkms_output); From 1053b9c948e614473819a1a5bcaff6d44e680dcf Mon Sep 17 00:00:00 2001 From: Flora Cui Date: Thu, 18 Nov 2021 16:25:19 +0800 Subject: [PATCH 03/14] drm/amdgpu: check atomic flag to differeniate with legacy path since vkms support atomic KMS interface Signed-off-by: Flora Cui Reviewed-by: Guchun Chen Acked-by: Alex Deucher Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d94fa748e6bbe..4b7a69ef721f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3833,7 +3833,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) /* disable all interrupts */ amdgpu_irq_disable_all(adev); if (adev->mode_info.mode_config_initialized){ - if (!amdgpu_device_has_dc_support(adev)) + if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev))) drm_helper_force_disable_all(adev_to_drm(adev)); else drm_atomic_helper_shutdown(adev_to_drm(adev)); @@ -5129,7 +5129,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res); } - if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) { + if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) { drm_helper_resume_force_mode(adev_to_drm(tmp_adev)); } From 7551f70ab93d0f3371b28e996f7583e3be1d9a72 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Fri, 26 Nov 2021 13:06:15 +0800 Subject: [PATCH 04/14] drm/amdgpu: fix the missed handling for SDMA2 and SDMA3 There is no base reg offset or ip_version set for SDMA2 and SDMA3 on SIENNA_CICHLID, so add them. Signed-off-by: Guchun Chen Reviewed-by: Kevin Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index f6fae79203ee1..ea00090b3fb36 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -157,6 +157,8 @@ static int hw_id_map[MAX_HWIP] = { [HDP_HWIP] = HDP_HWID, [SDMA0_HWIP] = SDMA0_HWID, [SDMA1_HWIP] = SDMA1_HWID, + [SDMA2_HWIP] = SDMA2_HWID, + [SDMA3_HWIP] = SDMA3_HWID, [MMHUB_HWIP] = MMHUB_HWID, [ATHUB_HWIP] = ATHUB_HWID, [NBIO_HWIP] = NBIF_HWID, From e0570f0b6e2e88be7ef99d1194b153cb054a2107 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 29 Nov 2021 15:57:44 +0800 Subject: [PATCH 05/14] drm/amdgpu: Don't halt RLC on GFX suspend On aldebaran, RLC also controls GFXCLK. Skip halting RLC during GFX IP suspend and keep it running till PMFW disables all DPMs. [ 578.019986] amdgpu 0000:23:00.0: amdgpu: GPU reset begin! [ 583.245566] amdgpu 0000:23:00.0: amdgpu: Failed to disable smu features. [ 583.245621] amdgpu 0000:23:00.0: amdgpu: Fail to disable dpm features! [ 583.245639] [drm:amdgpu_device_ip_suspend_phase2 [amdgpu]] *ERROR* suspend of IP block failed -62 [ 583.248504] [drm] free PSP TMR buffer Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 7 ++++--- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 34478bcc4d095..b305fd39874fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4060,9 +4060,10 @@ static int gfx_v9_0_hw_fini(void *handle) gfx_v9_0_cp_enable(adev, false); - /* Skip suspend with A+A reset */ - if (adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) { - dev_dbg(adev->dev, "Device in reset. Skipping RLC halt\n"); + /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */ + if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) || + (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) { + dev_dbg(adev->dev, "Skipping RLC halt\n"); return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 01168b8955bff..8a3244585d809 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1468,7 +1468,7 @@ static int smu_disable_dpms(struct smu_context *smu) dev_err(adev->dev, "Failed to disable smu features.\n"); } - if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0) && + if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2) && adev->gfx.rlc.funcs->stop) adev->gfx.rlc.funcs->stop(adev); From 94ebc035456a4ccacfbbef60c444079a256623ad Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 12 Nov 2021 15:27:32 -0500 Subject: [PATCH 06/14] drm/amd/display: Allow DSC on supported MST branch devices [Why] When trying to lightup two 4k60 non-DSC displays behind a branch device that supports DSC we can't lightup both at once due to bandwidth limitations - each requires 48 VCPI slots but we only have 63. [How] The workaround already exists in the code but is guarded by a CONFIG that cannot be set by the user and shouldn't need to be. Check for specific branch device IDs to device whether to enable the workaround for multiple display scenarios. Reviewed-by: Hersen Wu Acked-by: Bhawanpreet Lakha Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 32a5ce09a62a9..cc34a35d0bcbf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -36,6 +36,8 @@ #include "dm_helpers.h" #include "dc_link_ddc.h" +#include "ddc_service_types.h" +#include "dpcd_defs.h" #include "i2caux_interface.h" #include "dmub_cmd.h" @@ -157,6 +159,16 @@ static const struct drm_connector_funcs dm_dp_mst_connector_funcs = { }; #if defined(CONFIG_DRM_AMD_DC_DCN) +static bool needs_dsc_aux_workaround(struct dc_link *link) +{ + if (link->dpcd_caps.branch_dev_id == DP_BRANCH_DEVICE_ID_90CC24 && + (link->dpcd_caps.dpcd_rev.raw == DPCD_REV_14 || link->dpcd_caps.dpcd_rev.raw == DPCD_REV_12) && + link->dpcd_caps.sink_count.bits.SINK_COUNT >= 2) + return true; + + return false; +} + static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnector) { struct dc_sink *dc_sink = aconnector->dc_sink; @@ -166,7 +178,7 @@ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnecto u8 *dsc_branch_dec_caps = NULL; aconnector->dsc_aux = drm_dp_mst_dsc_aux_for_port(port); -#if defined(CONFIG_HP_HOOK_WORKAROUND) + /* * drm_dp_mst_dsc_aux_for_port() will return NULL for certain configs * because it only check the dsc/fec caps of the "port variable" and not the dock @@ -176,10 +188,10 @@ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnecto * Workaround: explicitly check the use case above and use the mst dock's aux as dsc_aux * */ - - if (!aconnector->dsc_aux && !port->parent->port_parent) + if (!aconnector->dsc_aux && !port->parent->port_parent && + needs_dsc_aux_workaround(aconnector->dc_link)) aconnector->dsc_aux = &aconnector->mst_port->dm_dp_aux.aux; -#endif + if (!aconnector->dsc_aux) return false; From ef548afe05f8d8c5af0fc44b035d5283156f8b03 Mon Sep 17 00:00:00 2001 From: "Shen, George" Date: Mon, 15 Nov 2021 22:38:18 -0500 Subject: [PATCH 07/14] drm/amd/display: Clear DPCD lane settings after repeater training [Why] VS and PE requested by repeater should not persist for the sink. [How] Clear DPCD lane settings after repeater link training finishes. Reviewed-by: Wesley Chalmers Acked-by: Bhawanpreet Lakha Signed-off-by: George Shen Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index cb7bf9148904e..13bc69d6b6791 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -2138,7 +2138,7 @@ static enum link_training_result dp_perform_8b_10b_link_training( } for (lane = 0; lane < (uint8_t)lt_settings->link_settings.lane_count; lane++) - lt_settings->dpcd_lane_settings[lane].bits.VOLTAGE_SWING_SET = VOLTAGE_SWING_LEVEL0; + lt_settings->dpcd_lane_settings[lane].raw = 0; } if (status == LINK_TRAINING_SUCCESS) { From 5ceaebcda9061c04f439c93961f0819878365c0f Mon Sep 17 00:00:00 2001 From: Mustapha Ghaddar Date: Mon, 15 Nov 2021 17:56:42 -0500 Subject: [PATCH 08/14] drm/amd/display: Fix for the no Audio bug with Tiled Displays [WHY] It seems like after a series of plug/unplugs we end up in a situation where tiled display doesnt support Audio. [HOW] The issue seems to be related to when we check streams changed after an HPD, we should be checking the audio_struct as well to see if any of its values changed. Reviewed-by: Jun Lei Acked-by: Bhawanpreet Lakha Signed-off-by: Mustapha Ghaddar Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index c32fdccd4d925..dcbfae2104934 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1664,6 +1664,10 @@ bool dc_is_stream_unchanged( if (old_stream->ignore_msa_timing_param != stream->ignore_msa_timing_param) return false; + // Only Have Audio left to check whether it is same or not. This is a corner case for Tiled sinks + if (old_stream->audio_info.mode_count != stream->audio_info.mode_count) + return false; + return true; } From c9beecc5c9626ab772160ab3f8e209abc09fa54d Mon Sep 17 00:00:00 2001 From: Jimmy Kizito Date: Sun, 14 Nov 2021 21:48:02 -0500 Subject: [PATCH 09/14] drm/amd/display: Add work around for tunneled MST. [Why] Certain USB4 docks do not seem to be able to handle disabling DSC once it has been enabled on an MST stream. This can result in blank displays. [How] As a work around, always enable DSC on docks exhibiting this issue. The flag to indicate the use of DSC for MST streams on a USB4 dock is set during detection of the dock and only cleared when the USB4 dock is disconnected. Reviewed-by: Jun Lei Reviewed-by: Aric Cyr Acked-by: Bhawanpreet Lakha Signed-off-by: Jimmy Kizito Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 16 +++++++++++++++ .../gpu/drm/amd/display/dc/core/dc_resource.c | 20 +++++++++---------- drivers/gpu/drm/amd/display/dc/dc.h | 3 ++- drivers/gpu/drm/amd/display/dc/dc_link.h | 2 ++ 4 files changed, 30 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 60544788e911e..c8457babfdea4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -758,6 +758,18 @@ static bool detect_dp(struct dc_link *link, dal_ddc_service_set_transaction_type(link->ddc, sink_caps->transaction_type); +#if defined(CONFIG_DRM_AMD_DC_DCN) + /* Apply work around for tunneled MST on certain USB4 docks. Always use DSC if dock + * reports DSC support. + */ + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && + link->type == dc_connection_mst_branch && + link->dpcd_caps.branch_dev_id == DP_BRANCH_DEVICE_ID_90CC24 && + link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT && + !link->dc->debug.dpia_debug.bits.disable_mst_dsc_work_around) + link->wa_flags.dpia_mst_dsc_always_on = true; +#endif + #if defined(CONFIG_DRM_AMD_DC_HDCP) /* In case of fallback to SST when topology discovery below fails * HDCP caps will be querried again later by the upper layer (caller @@ -1203,6 +1215,10 @@ static bool dc_link_detect_helper(struct dc_link *link, LINK_INFO("link=%d, mst branch is now Disconnected\n", link->link_index); + /* Disable work around which keeps DSC on for tunneled MST on certain USB4 docks. */ + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) + link->wa_flags.dpia_mst_dsc_always_on = false; + dm_helpers_dp_mst_stop_top_mgr(link->ctx, link); link->mst_stream_alloc_table.stream_count = 0; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index dcbfae2104934..e2d9a46d0e1ad 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -2256,16 +2256,6 @@ enum dc_status dc_validate_global_state( if (!new_ctx) return DC_ERROR_UNEXPECTED; -#if defined(CONFIG_DRM_AMD_DC_DCN) - - /* - * Update link encoder to stream assignment. - * TODO: Split out reason allocation from validation. - */ - if (dc->res_pool->funcs->link_encs_assign && fast_validate == false) - dc->res_pool->funcs->link_encs_assign( - dc, new_ctx, new_ctx->streams, new_ctx->stream_count); -#endif if (dc->res_pool->funcs->validate_global) { result = dc->res_pool->funcs->validate_global(dc, new_ctx); @@ -2317,6 +2307,16 @@ enum dc_status dc_validate_global_state( if (!dc->res_pool->funcs->validate_bandwidth(dc, new_ctx, fast_validate)) result = DC_FAIL_BANDWIDTH_VALIDATE; +#if defined(CONFIG_DRM_AMD_DC_DCN) + /* + * Only update link encoder to stream assignment after bandwidth validation passed. + * TODO: Split out assignment and validation. + */ + if (result == DC_OK && dc->res_pool->funcs->link_encs_assign && fast_validate == false) + dc->res_pool->funcs->link_encs_assign( + dc, new_ctx, new_ctx->streams, new_ctx->stream_count); +#endif + return result; } diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 3aac3f4a28525..618e7989176fc 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -508,7 +508,8 @@ union dpia_debug_options { uint32_t disable_dpia:1; uint32_t force_non_lttpr:1; uint32_t extend_aux_rd_interval:1; - uint32_t reserved:29; + uint32_t disable_mst_dsc_work_around:1; + uint32_t reserved:28; } bits; uint32_t raw; }; diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index 180ecd860296b..b01077a6af0e6 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -191,6 +191,8 @@ struct dc_link { bool dp_skip_DID2; bool dp_skip_reset_segment; bool dp_mot_reset_segment; + /* Some USB4 docks do not handle turning off MST DSC once it has been enabled. */ + bool dpia_mst_dsc_always_on; } wa_flags; struct link_mst_stream_allocation_table mst_stream_alloc_table; From fc2c456ea8329053685db179d30e3ff0c91e5066 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Fri, 26 Nov 2021 12:42:50 -0500 Subject: [PATCH 10/14] drm/amdkfd: set "r = 0" explicitly before goto To silence the following Smatch static checker warning: drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_svm.c:2615 svm_range_restore_pages() warn: missing error code here? 'get_task_mm()' failed. 'r' = '0' Signed-off-by: Philip Yang Suggested-by: Dan Carpenter Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 58b89b53ebe61..dd4715cb7d420 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2614,6 +2614,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, if (atomic_read(&svms->drain_pagefaults)) { pr_debug("draining retry fault, drop fault 0x%llx\n", addr); + r = 0; goto out; } @@ -2623,6 +2624,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, mm = get_task_mm(p->lead_thread); if (!mm) { pr_debug("svms 0x%p failed to get mm\n", svms); + r = 0; goto out; } @@ -2660,6 +2662,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, if (svm_range_skip_recover(prange)) { amdgpu_gmc_filter_faults_remove(adev, addr, pasid); + r = 0; goto out_unlock_range; } @@ -2668,6 +2671,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) { pr_debug("svms 0x%p [0x%lx %lx] already restored\n", svms, prange->start, prange->last); + r = 0; goto out_unlock_range; } From 494f2e42ce4a9ddffb5d8c5b2db816425ef90397 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Fri, 26 Nov 2021 18:43:09 -0500 Subject: [PATCH 11/14] drm/amdkfd: fix double free mem structure drm_gem_object_put calls release_notify callback to free the mem structure and unreserve_mem_limit, move it down after the last access of mem and make it conditional call. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 71a6a9ef54ac7..6348559608ce7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1396,7 +1396,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct sg_table *sg = NULL; uint64_t user_addr = 0; struct amdgpu_bo *bo; - struct drm_gem_object *gobj; + struct drm_gem_object *gobj = NULL; u32 domain, alloc_domain; u64 alloc_flags; int ret; @@ -1506,14 +1506,16 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); drm_vma_node_revoke(&gobj->vma_node, drm_priv); err_node_allow: - drm_gem_object_put(gobj); /* Don't unreserve system mem limit twice */ goto err_reserve_limit; err_bo_create: unreserve_mem_limit(adev, size, alloc_domain, !!sg); err_reserve_limit: mutex_destroy(&(*mem)->lock); - kfree(*mem); + if (gobj) + drm_gem_object_put(gobj); + else + kfree(*mem); err: if (sg) { sg_free_table(sg); From 2da34b7bb59e1caa9a336e0e20a76b8b6a4abea2 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Fri, 19 Nov 2021 04:27:55 -0500 Subject: [PATCH 12/14] drm/amd/display: add connector type check for CRC source set [Why] IGT bypass test will set crc source as DPRX,and display DM didn`t check connection type, it run the test on the HDMI connector ,then the kernel will be crashed because aux->transfer is set null for HDMI connection. This patch will skip the invalid connection test and fix kernel crash issue. [How] Check the connector type while setting the pipe crc source as DPRX or auto,if the type is not DP or eDP, the crtc crc source will not be set and report error code to IGT test,IGT will show the this subtest as no valid crtc/connector combinations found. 116.779714] [IGT] amd_bypass: starting subtest 8bpc-bypass-mode [ 117.730996] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 117.731001] #PF: supervisor instruction fetch in kernel mode [ 117.731003] #PF: error_code(0x0010) - not-present page [ 117.731004] PGD 0 P4D 0 [ 117.731006] Oops: 0010 [#1] SMP NOPTI [ 117.731009] CPU: 11 PID: 2428 Comm: amd_bypass Tainted: G OE 5.11.0-34-generic #36~20.04.1-Ubuntu [ 117.731011] Hardware name: AMD CZN/, BIOS AB.FD 09/07/2021 [ 117.731012] RIP: 0010:0x0 [ 117.731015] Code: Unable to access opcode bytes at RIP 0xffffffffffffffd6. [ 117.731016] RSP: 0018:ffffa8d64225bab8 EFLAGS: 00010246 [ 117.731017] RAX: 0000000000000000 RBX: 0000000000000020 RCX: ffffa8d64225bb5e [ 117.731018] RDX: ffff93151d921880 RSI: ffffa8d64225bac8 RDI: ffff931511a1a9d8 [ 117.731022] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 117.731023] CR2: ffffffffffffffd6 CR3: 000000010d5a4000 CR4: 0000000000750ee0 [ 117.731023] PKRU: 55555554 [ 117.731024] Call Trace: [ 117.731027] drm_dp_dpcd_access+0x72/0x110 [drm_kms_helper] [ 117.731036] drm_dp_dpcd_read+0xb7/0xf0 [drm_kms_helper] [ 117.731040] drm_dp_start_crc+0x38/0xb0 [drm_kms_helper] [ 117.731047] amdgpu_dm_crtc_set_crc_source+0x1ae/0x3e0 [amdgpu] [ 117.731149] crtc_crc_open+0x174/0x220 [drm] [ 117.731162] full_proxy_open+0x168/0x1f0 [ 117.731165] ? open_proxy_open+0x100/0x100 BugLink: https://gitlab.freedesktop.org/drm/amd/-/issues/1546 Reviewed-by: Harry Wentland Reviewed-by: Rodrigo Siqueira Signed-off-by: Perry Yuan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index cce062adc4391..8a441a22c46ec 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -314,6 +314,14 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) ret = -EINVAL; goto cleanup; } + + if ((aconn->base.connector_type != DRM_MODE_CONNECTOR_DisplayPort) && + (aconn->base.connector_type != DRM_MODE_CONNECTOR_eDP)) { + DRM_DEBUG_DRIVER("No DP connector available for CRC source\n"); + ret = -EINVAL; + goto cleanup; + } + } #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) From 428890a3fec131521cc59aac0d3c48bde9d76b7b Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Mon, 29 Nov 2021 21:29:05 -0500 Subject: [PATCH 13/14] drm/amdgpu: adjust the kfd reset sequence in reset sriov function This change revert previous commits: 9f4f2c1a3524 ("drm/amd/amdgpu: fix the kfd pre_reset sequence in sriov") 271fd38ce56d ("drm/amdgpu: move kfd post_reset out of reset_sriov function") This change moves the amdgpu_amdkfd_pre_reset to an earlier place in amdgpu_device_reset_sriov, presumably to address the sequence issue that the first patch was originally meant to fix. Some register access(GRBM_GFX_CNTL) only be allowed on full access mode. Move kfd_pre_reset and kfd_post_reset back inside reset_sriov function. Fixes: 9f4f2c1a3524 ("drm/amd/amdgpu: fix the kfd pre_reset sequence in sriov") Fixes: 271fd38ce56d ("drm/amdgpu: move kfd post_reset out of reset_sriov function") Signed-off-by: shaoyunl Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 4b7a69ef721f9..1e651b9591419 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4289,6 +4289,8 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, { int r; + amdgpu_amdkfd_pre_reset(adev); + if (from_hypervisor) r = amdgpu_virt_request_full_gpu(adev, true); else @@ -4316,6 +4318,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, amdgpu_irq_gpu_reset_resume_helper(adev); r = amdgpu_ib_ring_tests(adev); + amdgpu_amdkfd_post_reset(adev); error: if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { @@ -5030,7 +5033,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, cancel_delayed_work_sync(&tmp_adev->delayed_init_work); - amdgpu_amdkfd_pre_reset(tmp_adev); + if (!amdgpu_sriov_vf(tmp_adev)) + amdgpu_amdkfd_pre_reset(tmp_adev); /* * Mark these ASICs to be reseted as untracked first @@ -5148,9 +5152,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, skip_sched_resume: list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - /* unlock kfd */ - if (!need_emergency_restart) - amdgpu_amdkfd_post_reset(tmp_adev); + /* unlock kfd: SRIOV would do it separately */ + if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev)) + amdgpu_amdkfd_post_reset(tmp_adev); /* kfd_post_reset will do nothing if kfd device is not initialized, * need to bring up kfd here if it's not be initialized before From 3abfe30d803e62cc75dec254eefab3b04d69219b Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 29 Nov 2021 12:33:05 -0500 Subject: [PATCH 14/14] drm/amdkfd: process_info lock not needed for svm process_info->lock is used to protect kfd_bo_list, vm_list_head, n_vms and userptr valid/inval list, svm_range_restore_work and svm_range_set_attr don't access those, so do not need to take process_info lock. This will avoid potential circular locking issue. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index dd4715cb7d420..3cb4681c5f539 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1574,7 +1574,6 @@ svm_range_list_lock_and_flush_work(struct svm_range_list *svms, static void svm_range_restore_work(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); - struct amdkfd_process_info *process_info; struct svm_range_list *svms; struct svm_range *prange; struct kfd_process *p; @@ -1594,12 +1593,10 @@ static void svm_range_restore_work(struct work_struct *work) * the lifetime of this thread, kfd_process and mm will be valid. */ p = container_of(svms, struct kfd_process, svms); - process_info = p->kgd_process_info; mm = p->mm; if (!mm) return; - mutex_lock(&process_info->lock); svm_range_list_lock_and_flush_work(svms, mm); mutex_lock(&svms->lock); @@ -1652,7 +1649,6 @@ static void svm_range_restore_work(struct work_struct *work) out_reschedule: mutex_unlock(&svms->lock); mmap_write_unlock(mm); - mutex_unlock(&process_info->lock); /* If validation failed, reschedule another attempt */ if (evicted_ranges) { @@ -3181,7 +3177,6 @@ static int svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) { - struct amdkfd_process_info *process_info = p->kgd_process_info; struct mm_struct *mm = current->mm; struct list_head update_list; struct list_head insert_list; @@ -3200,8 +3195,6 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, svms = &p->svms; - mutex_lock(&process_info->lock); - svm_range_list_lock_and_flush_work(svms, mm); r = svm_range_is_valid(p, start, size); @@ -3277,8 +3270,6 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, mutex_unlock(&svms->lock); mmap_read_unlock(mm); out: - mutex_unlock(&process_info->lock); - pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid, &p->svms, start, start + size - 1, r);