From 573f1fe8ad448a0907b17be59cb2253589750cca Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Mon, 8 Aug 2022 10:06:03 +0800 Subject: [PATCH 01/79] drm/amdgpu: drop compiler guard for pcie_p2p CONFIG_HSA_AMD_P2P is not needed as a build option for pcie_p2p, as p2p feature needs to be always supported on dkms branch, otherwise, intree build fails once CONFIG_HSA_AMD_P2P is not defined. Also drop redundant extern of pcie_p2p in amdgpu.h. Fixes: af428201b20e("drm/amdgpu: relax the check in amdgpu_device_is_peer_accessible") Signed-off-by: Guchun Chen --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 -- 2 files changed, 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 98db621a053eb..2c4bdc0530cc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -228,9 +228,6 @@ static const int __maybe_unused sched_policy = KFD_SCHED_POLICY_HWS; static const bool __maybe_unused debug_evictions; /* = false */ static const bool __maybe_unused no_system_mem_limit; #endif -#ifdef CONFIG_HSA_AMD_P2P -extern bool pcie_p2p; -#endif extern int amdgpu_tmz; extern int amdgpu_reset_method; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 2f44e8f0bf331..74743245063d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -830,11 +830,9 @@ MODULE_PARM_DESC(keep_idle_process_evicted, "Restore evicted process only if que * DOC: pcie_p2p (bool) * Enable PCIe P2P (requires large-BAR). Default value: true (on) */ -#ifdef CONFIG_HSA_AMD_P2P bool pcie_p2p = true; module_param(pcie_p2p, bool, 0444); MODULE_PARM_DESC(pcie_p2p, "Enable PCIe P2P (requires large-BAR). (N = off, Y = on(default))"); -#endif /** * DOC: dcfeaturemask (uint) From 5569de813c65b06431f730222d1bfe3b1dc425ec Mon Sep 17 00:00:00 2001 From: Leslie Shi Date: Mon, 8 Aug 2022 10:49:08 +0800 Subject: [PATCH 02/79] drm/amdgpu: enable CONFIG_HSA_AMD_P2P when PCI_P2PDMA and DMABUF_MOVENOTIFY are set Suggested-by: Felix Kuehling Signed-off-by: Leslie Shi Reviewed-by: Guchun Chen --- drivers/gpu/drm/amd/dkms/Makefile | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/dkms/Makefile b/drivers/gpu/drm/amd/dkms/Makefile index f14bbaa6a7918..91108d5f9b5e0 100644 --- a/drivers/gpu/drm/amd/dkms/Makefile +++ b/drivers/gpu/drm/amd/dkms/Makefile @@ -169,9 +169,11 @@ export CONFIG_DRM_AMD_DC_HDCP=y subdir-ccflags-y += -DCONFIG_DRM_AMD_DC_HDCP endif -ifeq ($(call _is_kcl_macro_defined,HAVE_LINUX_PCI_P2PDMA_H),y) -export CONFIG_HSA_AMD_P2P=y -subdir-ccflags-y += -DCONFIG_HSA_AMD_P2P +ifeq (y,$(CONFIG_PCI_P2PDMA)) + ifeq (y,$(CONFIG_DMABUF_MOVENOTIFY)) + export CONFIG_HSA_AMD_P2P=y + subdir-ccflags-y += -DCONFIG_HSA_AMD_P2P + endif endif # Trying to enable DCN2/3 with core2 optimizations will result in From b4ca9678979b8757c77916bfba55cac938c17a82 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 20 Jul 2022 16:00:56 -0400 Subject: [PATCH 03/79] drm/amd/display: reduce stack size in dcn32 dml (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move additional dummy structures off the stack and into the dummy vars structure. Fixes the following: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c: In function 'DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation': drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:1659:1: error: the frame size of 2144 bytes is larger than 2048 bytes [-Werror=frame-larger-than=] 1659 | } | ^ drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c: In function 'dml32_ModeSupportAndSystemConfigurationFull': drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:3799:1: error: the frame size of 2464 bytes is larger than 2048 bytes [-Werror=frame-larger-than=] 3799 | } // ModeSupportAndSystemConfigurationFull | ^ v2: move more stuff to dummy structure, fix init order (Alex) Acked-by: Christian König Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Cc: Stephen Rothwell --- .../dc/dml/dcn32/display_mode_vba_32.c | 411 ++++++++---------- .../drm/amd/display/dc/dml/display_mode_vba.h | 36 ++ 2 files changed, 217 insertions(+), 230 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index e9204c711cb96..9c2003fbe8fa4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -65,6 +65,12 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman int iteration; double MaxTotalRDBandwidth; unsigned int NextPrefetchMode; + double MaxTotalRDBandwidthNoUrgentBurst = 0.0; + bool DestinationLineTimesForPrefetchLessThan2 = false; + bool VRatioPrefetchMoreThanMax = false; + double TWait; + double TotalWRBandwidth = 0; + double WRBandwidth = 0; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: --- START ---\n", __func__); @@ -710,11 +716,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman NextPrefetchMode = mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb]; do { - double MaxTotalRDBandwidthNoUrgentBurst = 0.0; - bool DestinationLineTimesForPrefetchLessThan2 = false; - bool VRatioPrefetchMoreThanMax = false; - double dummy_unit_vector[DC__NUM_DPP__MAX]; - MaxTotalRDBandwidth = 0; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, mode_lib->vba.VStartupLines); @@ -723,41 +724,39 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman /* NOTE PerfetchMode variable is invalid in DAL as per the input received. * Hence the direction is to use PrefetchModePerState. */ - double TWait = dml32_CalculateTWait( - mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], - mode_lib->vba.UsesMALLForPStateChange[k], - mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal, - mode_lib->vba.DRRDisplay[k], - mode_lib->vba.DRAMClockChangeLatency, - mode_lib->vba.FCLKChangeLatency, v->UrgentLatency, - mode_lib->vba.SREnterPlusExitTime); - - DmlPipe myPipe; - - myPipe.Dppclk = mode_lib->vba.DPPCLK[k]; - myPipe.Dispclk = mode_lib->vba.DISPCLK; - myPipe.PixelClock = mode_lib->vba.PixelClock[k]; - myPipe.DCFClkDeepSleep = v->DCFCLKDeepSleep; - myPipe.DPPPerSurface = mode_lib->vba.DPPPerPlane[k]; - myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; - myPipe.SourceRotation = mode_lib->vba.SourceRotation[k]; - myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k]; - myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k]; - myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k]; - myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k]; - myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; - myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; - myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; - myPipe.HTotal = mode_lib->vba.HTotal[k]; - myPipe.HActive = mode_lib->vba.HActive[k]; - myPipe.DCCEnable = mode_lib->vba.DCCEnable[k]; - myPipe.ODMMode = mode_lib->vba.ODMCombineEnabled[k]; - myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k]; - myPipe.BytePerPixelY = v->BytePerPixelY[k]; - myPipe.BytePerPixelC = v->BytePerPixelC[k]; - myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP; + TWait = dml32_CalculateTWait( + mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], + mode_lib->vba.UsesMALLForPStateChange[k], + mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal, + mode_lib->vba.DRRDisplay[k], + mode_lib->vba.DRAMClockChangeLatency, + mode_lib->vba.FCLKChangeLatency, v->UrgentLatency, + mode_lib->vba.SREnterPlusExitTime); + + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.Dppclk = mode_lib->vba.DPPCLK[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.Dispclk = mode_lib->vba.DISPCLK; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.PixelClock = mode_lib->vba.PixelClock[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DCFClkDeepSleep = v->DCFCLKDeepSleep; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DPPPerSurface = mode_lib->vba.DPPPerPlane[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.SourceRotation = mode_lib->vba.SourceRotation[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.HTotal = mode_lib->vba.HTotal[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.HActive = mode_lib->vba.HActive[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DCCEnable = mode_lib->vba.DCCEnable[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ODMMode = mode_lib->vba.ODMCombineEnabled[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelY = v->BytePerPixelY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelC = v->BytePerPixelC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP; v->ErrorResult[k] = dml32_CalculatePrefetchSchedule(v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor, - &myPipe, v->DSCDelay[k], + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe, v->DSCDelay[k], mode_lib->vba.DPPCLKDelaySubtotal + mode_lib->vba.DPPCLKDelayCNVCFormater, mode_lib->vba.DPPCLKDelaySCL, mode_lib->vba.DPPCLKDelaySCLLBOnly, @@ -906,8 +905,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman #endif { - double dummy_single[1]; - dml32_CalculatePrefetchBandwithSupport( mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.ReturnBW, @@ -931,16 +928,14 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman /* output */ &MaxTotalRDBandwidth, - &dummy_single[0], + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], &v->PrefetchModeSupported); } for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) - dummy_unit_vector[k] = 1.0; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector[k] = 1.0; { - double dummy_single[1]; - bool dummy_boolean[1]; dml32_CalculatePrefetchBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.ReturnBW, v->NoUrgentLatencyHidingPre, @@ -954,17 +949,17 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->cursor_bw_pre, v->prefetch_vmrow_bw, mode_lib->vba.DPPPerPlane, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, /* output */ - &dummy_single[0], + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], &v->FractionOfUrgentBandwidth, - &dummy_boolean[0]); + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean); } if (VRatioPrefetchMoreThanMax != false || DestinationLineTimesForPrefetchLessThan2 != false) { @@ -1047,8 +1042,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman } { - double dummy_single[2]; - bool dummy_boolean[1]; dml32_CalculateImmediateFlipBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.ReturnBW, mode_lib->vba.ImmediateFlipRequirement, @@ -1072,7 +1065,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman /* output */ &v->total_dcn_read_bw_with_flip, // Single *TotalBandwidth - &dummy_single[0], // Single *FractionOfUrgentBandwidth + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], // Single *FractionOfUrgentBandwidth &v->ImmediateFlipSupported); // Boolean *ImmediateFlipBandwidthSupport dml32_CalculateImmediateFlipBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces, @@ -1089,17 +1082,17 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->cursor_bw_pre, v->prefetch_vmrow_bw, mode_lib->vba.DPPPerPlane, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, /* output */ - &dummy_single[1], // Single *TotalBandwidth + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[1], // Single *TotalBandwidth &v->FractionOfUrgentBandwidthImmediateFlip, // Single *FractionOfUrgentBandwidth - &dummy_boolean[0]); // Boolean *ImmediateFlipBandwidthSupport + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean); // Boolean *ImmediateFlipBandwidthSupport } for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { @@ -1157,22 +1150,17 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman //Watermarks and NB P-State/DRAM Clock Change Support { - SOCParametersList mmSOCParameters; - enum clock_change_support dummy_dramchange_support; - enum dm_fclock_change_support dummy_fclkchange_support; - bool dummy_USRRetrainingSupport; - - mmSOCParameters.UrgentLatency = v->UrgentLatency; - mmSOCParameters.ExtraLatency = v->UrgentExtraLatency; - mmSOCParameters.WritebackLatency = mode_lib->vba.WritebackLatency; - mmSOCParameters.DRAMClockChangeLatency = mode_lib->vba.DRAMClockChangeLatency; - mmSOCParameters.FCLKChangeLatency = mode_lib->vba.FCLKChangeLatency; - mmSOCParameters.SRExitTime = mode_lib->vba.SRExitTime; - mmSOCParameters.SREnterPlusExitTime = mode_lib->vba.SREnterPlusExitTime; - mmSOCParameters.SRExitZ8Time = mode_lib->vba.SRExitZ8Time; - mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->vba.SREnterPlusExitZ8Time; - mmSOCParameters.USRRetrainingLatency = mode_lib->vba.USRRetrainingLatency; - mmSOCParameters.SMNLatency = mode_lib->vba.SMNLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.UrgentLatency = v->UrgentLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.ExtraLatency = v->UrgentExtraLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.WritebackLatency = mode_lib->vba.WritebackLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.DRAMClockChangeLatency = mode_lib->vba.DRAMClockChangeLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.FCLKChangeLatency = mode_lib->vba.FCLKChangeLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SRExitTime = mode_lib->vba.SRExitTime; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SREnterPlusExitTime = mode_lib->vba.SREnterPlusExitTime; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SRExitZ8Time = mode_lib->vba.SRExitZ8Time; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->vba.SREnterPlusExitZ8Time; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.USRRetrainingLatency = mode_lib->vba.USRRetrainingLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SMNLatency = mode_lib->vba.SMNLatency; dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( mode_lib->vba.USRRetrainingRequiredFinal, @@ -1190,7 +1178,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->dpte_group_bytes, v->meta_row_height, v->meta_row_height_chroma, - mmSOCParameters, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters, mode_lib->vba.WritebackChunkSize, mode_lib->vba.SOCCLK, v->DCFCLKDeepSleep, @@ -1227,12 +1215,12 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman /* Output */ &v->Watermark, - &dummy_dramchange_support, + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_dramchange_support, v->MaxActiveDRAMClockChangeLatencySupported, v->SubViewportLinesNeededInMALL, - &dummy_fclkchange_support, + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_fclkchange_support, &v->MinActiveFCLKChangeLatencySupported, - &dummy_USRRetrainingSupport, + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_USRRetrainingSupport, mode_lib->vba.ActiveDRAMClockChangeLatencyMargin); /* DCN32 has a new struct Watermarks (typedef) which is used to store @@ -1494,9 +1482,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman { //Maximum Bandwidth Used - double TotalWRBandwidth = 0; - double WRBandwidth = 0; - for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.WritebackEnable[k] == true && mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { @@ -1590,9 +1575,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman #ifdef __DML_VBA_ALLOW_DELTA__ { - double dummy_single[2]; unsigned int dummy_integer[1]; - bool dummy_boolean[1]; // Calculate z8 stutter eff assuming 0 reserved space dml32_CalculateStutterEfficiency(v->CompressedBufferSizeInkByte, @@ -1645,14 +1628,14 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->meta_row_bw, v->dpte_row_bw, /* Output */ - &dummy_single[0], - &dummy_single[1], + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[1], &dummy_integer[0], &v->Z8StutterEfficiencyNotIncludingVBlankBestCase, &v->Z8StutterEfficiencyBestCase, &v->Z8NumberOfStutterBurstsPerFrameBestCase, &v->StutterPeriodBestCase, - &dummy_boolean[0]); + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean); } #else v->Z8StutterEfficiencyNotIncludingVBlankBestCase = v->Z8StutterEfficiencyNotIncludingVBlank; @@ -1668,32 +1651,18 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) { - unsigned int dummy_integer[4]; - bool dummy_boolean[2]; - bool MPCCombineMethodAsNeededForPStateChangeAndVoltage; - bool MPCCombineMethodAsPossible; - enum odm_combine_mode dummy_odm_mode[DC__NUM_DPP__MAX]; - unsigned int TotalNumberOfActiveOTG; - unsigned int TotalNumberOfActiveHDMIFRL; - unsigned int TotalNumberOfActiveDP2p0; - unsigned int TotalNumberOfActiveDP2p0Outputs; - unsigned int TotalDSCUnitsRequired; - unsigned int m; - unsigned int ReorderingBytes; - bool FullFrameMALLPStateMethod; - bool SubViewportMALLPStateMethod; - bool PhantomPipeMALLPStateMethod; + struct vba_vars_st *v = &mode_lib->vba; + int i, j; + unsigned int k, m; unsigned int MaximumMPCCombine; + unsigned int NumberOfNonCombinedSurfaceOfMaximumBandwidth; + unsigned int TotalSlots; bool CompBufReservedSpaceNeedAdjustment; bool CompBufReservedSpaceNeedAdjustmentSingleDPP; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: called\n", __func__); #endif - struct vba_vars_st *v = &mode_lib->vba; - - int i, j; - unsigned int k; /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ @@ -1945,7 +1914,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.Read256BlockHeightC, mode_lib->vba.Read256BlockWidthY, mode_lib->vba.Read256BlockWidthC, - dummy_odm_mode, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_odm_mode, mode_lib->vba.BlendingAndTiming, mode_lib->vba.BytePerPixelY, mode_lib->vba.BytePerPixelC, @@ -1973,35 +1942,26 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.SingleDPPViewportSizeSupportPerSurface,/* bool ViewportSizeSupportPerSurface[] */ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[1][0]); /* bool *ViewportSizeSupport */ - MPCCombineMethodAsNeededForPStateChangeAndVoltage = false; - MPCCombineMethodAsPossible = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible = false; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.MPCCombineUse[k] == dm_mpc_reduce_voltage_and_clocks) - MPCCombineMethodAsNeededForPStateChangeAndVoltage = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage = true; if (mode_lib->vba.MPCCombineUse[k] == dm_mpc_always_when_possible) - MPCCombineMethodAsPossible = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible = true; } - mode_lib->vba.MPCCombineMethodIncompatible = MPCCombineMethodAsNeededForPStateChangeAndVoltage - && MPCCombineMethodAsPossible; + mode_lib->vba.MPCCombineMethodIncompatible = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage + && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible; for (i = 0; i < v->soc.num_states; i++) { for (j = 0; j < 2; j++) { - bool NoChroma; mode_lib->vba.TotalNumberOfActiveDPP[i][j] = 0; mode_lib->vba.TotalAvailablePipesSupport[i][j] = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC = dm_odm_combine_mode_disabled; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC = dm_odm_combine_mode_disabled; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { - - bool TotalAvailablePipesSupportNoDSC; - unsigned int NumberOfDPPNoDSC; - enum odm_combine_mode ODMModeNoDSC = dm_odm_combine_mode_disabled; - double RequiredDISPCLKPerSurfaceNoDSC; - bool TotalAvailablePipesSupportDSC; - unsigned int NumberOfDPPDSC; - enum odm_combine_mode ODMModeDSC = dm_odm_combine_mode_disabled; - double RequiredDISPCLKPerSurfaceDSC; - dml32_CalculateODMMode( mode_lib->vba.MaximumPixelsPerLinePerDSCUnit, mode_lib->vba.HActive[k], @@ -2018,10 +1978,10 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DISPCLKDPPCLKVCOSpeed, /* Output */ - &TotalAvailablePipesSupportNoDSC, - &NumberOfDPPNoDSC, - &ODMModeNoDSC, - &RequiredDISPCLKPerSurfaceNoDSC); + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportNoDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPNoDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceNoDSC); dml32_CalculateODMMode( mode_lib->vba.MaximumPixelsPerLinePerDSCUnit, @@ -2039,10 +1999,10 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DISPCLKDPPCLKVCOSpeed, /* Output */ - &TotalAvailablePipesSupportDSC, - &NumberOfDPPDSC, - &ODMModeDSC, - &RequiredDISPCLKPerSurfaceDSC); + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceDSC); dml32_CalculateOutputLink( mode_lib->vba.PHYCLKPerState[i], @@ -2060,8 +2020,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.NumberOfDSCSlices[k], mode_lib->vba.AudioSampleRate[k], mode_lib->vba.AudioSampleLayout[k], - ODMModeNoDSC, - ODMModeDSC, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC, mode_lib->vba.DSCEnable[k], mode_lib->vba.OutputLinkDPLanes[k], mode_lib->vba.OutputLinkDPRate[k], @@ -2075,21 +2035,21 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &mode_lib->vba.RequiredSlots[i][k]); if (mode_lib->vba.RequiresDSC[i][k] == false) { - mode_lib->vba.ODMCombineEnablePerState[i][k] = ODMModeNoDSC; + mode_lib->vba.ODMCombineEnablePerState[i][k] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC; mode_lib->vba.RequiredDISPCLKPerSurface[i][j][k] = - RequiredDISPCLKPerSurfaceNoDSC; - if (!TotalAvailablePipesSupportNoDSC) + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceNoDSC; + if (!v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportNoDSC) mode_lib->vba.TotalAvailablePipesSupport[i][j] = false; mode_lib->vba.TotalNumberOfActiveDPP[i][j] = - mode_lib->vba.TotalNumberOfActiveDPP[i][j] + NumberOfDPPNoDSC; + mode_lib->vba.TotalNumberOfActiveDPP[i][j] + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPNoDSC; } else { - mode_lib->vba.ODMCombineEnablePerState[i][k] = ODMModeDSC; + mode_lib->vba.ODMCombineEnablePerState[i][k] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC; mode_lib->vba.RequiredDISPCLKPerSurface[i][j][k] = - RequiredDISPCLKPerSurfaceDSC; - if (!TotalAvailablePipesSupportDSC) + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceDSC; + if (!v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportDSC) mode_lib->vba.TotalAvailablePipesSupport[i][j] = false; mode_lib->vba.TotalNumberOfActiveDPP[i][j] = - mode_lib->vba.TotalNumberOfActiveDPP[i][j] + NumberOfDPPDSC; + mode_lib->vba.TotalNumberOfActiveDPP[i][j] + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPDSC; } } @@ -2124,7 +2084,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] = 0; - NoChroma = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma = true; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.NoOfDPP[i][j][k] == 1) @@ -2134,7 +2094,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l || mode_lib->vba.SourcePixelFormat[k] == dm_420_10 || mode_lib->vba.SourcePixelFormat[k] == dm_420_12 || mode_lib->vba.SourcePixelFormat[k] == dm_rgbe_alpha) { - NoChroma = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma = false; } } @@ -2145,15 +2105,15 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l if (j == 1 && !dml32_UnboundedRequest(mode_lib->vba.UseUnboundedRequesting, - mode_lib->vba.TotalNumberOfActiveDPP[i][j], NoChroma, + mode_lib->vba.TotalNumberOfActiveDPP[i][j], v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma, mode_lib->vba.Output[0], mode_lib->vba.SurfaceTiling[0], CompBufReservedSpaceNeedAdjustment, mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)) { while (!(mode_lib->vba.TotalNumberOfActiveDPP[i][j] >= mode_lib->vba.MaxNumDPP || mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] == 0)) { - double BWOfNonCombinedSurfaceOfMaximumBandwidth = 0; - unsigned int NumberOfNonCombinedSurfaceOfMaximumBandwidth = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth = 0; + NumberOfNonCombinedSurfaceOfMaximumBandwidth = 0; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.MPCCombineUse[k] @@ -2161,13 +2121,13 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.MPCCombineUse[k] != dm_mpc_reduce_voltage && mode_lib->vba.ReadBandwidthLuma[k] + mode_lib->vba.ReadBandwidthChroma[k] > - BWOfNonCombinedSurfaceOfMaximumBandwidth && + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth && (mode_lib->vba.ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_2to1 && mode_lib->vba.ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) && mode_lib->vba.MPCCombine[i][j][k] == false) { - BWOfNonCombinedSurfaceOfMaximumBandwidth = + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth = mode_lib->vba.ReadBandwidthLuma[k] + mode_lib->vba.ReadBandwidthChroma[k]; NumberOfNonCombinedSurfaceOfMaximumBandwidth = k; @@ -2233,28 +2193,28 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } // i (VOLTAGE_STATE) /* Total Available OTG, HDMIFRL, DP Support Check */ - TotalNumberOfActiveOTG = 0; - TotalNumberOfActiveHDMIFRL = 0; - TotalNumberOfActiveDP2p0 = 0; - TotalNumberOfActiveDP2p0Outputs = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveHDMIFRL = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs = 0; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.BlendingAndTiming[k] == k) { - TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG + 1; if (mode_lib->vba.Output[k] == dm_dp2p0) { - TotalNumberOfActiveDP2p0 = TotalNumberOfActiveDP2p0 + 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 + 1; if (mode_lib->vba.OutputMultistreamId[k] == k || mode_lib->vba.OutputMultistreamEn[k] == false) { - TotalNumberOfActiveDP2p0Outputs = TotalNumberOfActiveDP2p0Outputs + 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs + 1; } } } } - mode_lib->vba.NumberOfOTGSupport = (TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG); - mode_lib->vba.NumberOfHDMIFRLSupport = (TotalNumberOfActiveHDMIFRL <= mode_lib->vba.MaxNumHDMIFRLOutputs); - mode_lib->vba.NumberOfDP2p0Support = (TotalNumberOfActiveDP2p0 <= mode_lib->vba.MaxNumDP2p0Streams - && TotalNumberOfActiveDP2p0Outputs <= mode_lib->vba.MaxNumDP2p0Outputs); + mode_lib->vba.NumberOfOTGSupport = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG); + mode_lib->vba.NumberOfHDMIFRLSupport = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveHDMIFRL <= mode_lib->vba.MaxNumHDMIFRLOutputs); + mode_lib->vba.NumberOfDP2p0Support = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 <= mode_lib->vba.MaxNumDP2p0Streams + && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs <= mode_lib->vba.MaxNumDP2p0Outputs); /* Display IO and DSC Support Check */ mode_lib->vba.NonsupportedDSCInputBPC = false; @@ -2269,8 +2229,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } for (i = 0; i < v->soc.num_states; ++i) { - unsigned int TotalSlots; - mode_lib->vba.ExceededMultistreamSlots[i] = false; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.OutputMultistreamEn[k] == true && mode_lib->vba.OutputMultistreamId[k] == k) { @@ -2441,12 +2399,12 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } /* Check DSC Unit and Slices Support */ - TotalDSCUnitsRequired = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = 0; for (i = 0; i < v->soc.num_states; ++i) { mode_lib->vba.NotEnoughDSCUnits[i] = false; mode_lib->vba.NotEnoughDSCSlices[i] = false; - TotalDSCUnitsRequired = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = 0; mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = true; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.RequiresDSC[i][k] == true) { @@ -2454,33 +2412,31 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l if (mode_lib->vba.HActive[k] > 4 * mode_lib->vba.MaximumPixelsPerLinePerDSCUnit) mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false; - TotalDSCUnitsRequired = TotalDSCUnitsRequired + 4; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 4; if (mode_lib->vba.NumberOfDSCSlices[k] > 16) mode_lib->vba.NotEnoughDSCSlices[i] = true; } else if (mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { if (mode_lib->vba.HActive[k] > 2 * mode_lib->vba.MaximumPixelsPerLinePerDSCUnit) mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false; - TotalDSCUnitsRequired = TotalDSCUnitsRequired + 2; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 2; if (mode_lib->vba.NumberOfDSCSlices[k] > 8) mode_lib->vba.NotEnoughDSCSlices[i] = true; } else { if (mode_lib->vba.HActive[k] > mode_lib->vba.MaximumPixelsPerLinePerDSCUnit) mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false; - TotalDSCUnitsRequired = TotalDSCUnitsRequired + 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 1; if (mode_lib->vba.NumberOfDSCSlices[k] > 4) mode_lib->vba.NotEnoughDSCSlices[i] = true; } } } - if (TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC) + if (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC) mode_lib->vba.NotEnoughDSCUnits[i] = true; } /*DSC Delay per state*/ for (i = 0; i < v->soc.num_states; ++i) { - unsigned int m; - for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { mode_lib->vba.DSCDelayPerState[i][k] = dml32_DSCDelayRequirement( mode_lib->vba.RequiresDSC[i][k], mode_lib->vba.ODMCombineEnablePerState[i][k], @@ -2576,8 +2532,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DETBufferSizeCThisState, &mode_lib->vba.UnboundedRequestEnabledThisState, &mode_lib->vba.CompressedBufferSizeInkByteThisState, - &dummy_integer[0], /* Long CompBufReservedSpaceKBytes */ - &dummy_boolean[0], /* bool CompBufReservedSpaceNeedAdjustment */ + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], /* Long CompBufReservedSpaceKBytes */ + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean[0], /* bool CompBufReservedSpaceNeedAdjustment */ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[0], &mode_lib->vba.ViewportSizeSupport[i][j]); @@ -2942,7 +2898,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } - ReorderingBytes = mode_lib->vba.NumberOfChannels + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes = mode_lib->vba.NumberOfChannels * dml_max3(mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly, mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly); @@ -2998,20 +2954,20 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l && (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)); } - FullFrameMALLPStateMethod = false; - SubViewportMALLPStateMethod = false; - PhantomPipeMALLPStateMethod = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod = false; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame) - FullFrameMALLPStateMethod = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod = true; if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) - SubViewportMALLPStateMethod = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod = true; if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) - PhantomPipeMALLPStateMethod = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod = true; } - mode_lib->vba.InvalidCombinationOfMALLUseForPState = (SubViewportMALLPStateMethod - != PhantomPipeMALLPStateMethod) || (SubViewportMALLPStateMethod && FullFrameMALLPStateMethod); + mode_lib->vba.InvalidCombinationOfMALLUseForPState = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod + != v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod) || (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod); if (mode_lib->vba.UseMinimumRequiredDCFCLK == true) { dml32_UseMinimumDCFCLK( @@ -3025,7 +2981,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.SREnterPlusExitTime, mode_lib->vba.ReturnBusWidth, mode_lib->vba.RoundTripPingLatencyCycles, - ReorderingBytes, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes, mode_lib->vba.PixelChunkSizeInKByte, mode_lib->vba.MetaChunkSize, mode_lib->vba.GPUVMEnable, @@ -3088,7 +3044,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l / mode_lib->vba.ReturnBWPerState[i][j] > (mode_lib->vba.RoundTripPingLatencyCycles + 32) / mode_lib->vba.DCFCLKState[i][j] - + ReorderingBytes / mode_lib->vba.ReturnBWPerState[i][j]) { + + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes / mode_lib->vba.ReturnBWPerState[i][j]) { mode_lib->vba.ROBSupport[i][j] = true; } else { mode_lib->vba.ROBSupport[i][j] = false; @@ -3130,9 +3086,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l for (i = 0; i < (int) v->soc.num_states; ++i) { for (j = 0; j <= 1; ++j) { - double VMDataOnlyReturnBWPerState; - double HostVMInefficiencyFactor; - unsigned int NextPrefetchModeState; mode_lib->vba.TimeCalc = 24 / mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j]; @@ -3172,37 +3125,35 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.UrgentBurstFactorChroma, mode_lib->vba.UrgentBurstFactorCursor); - VMDataOnlyReturnBWPerState = dml32_get_return_bw_mbps_vm_only(&mode_lib->vba.soc, i, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.VMDataOnlyReturnBWPerState = dml32_get_return_bw_mbps_vm_only(&mode_lib->vba.soc, i, mode_lib->vba.DCFCLKState[i][j], mode_lib->vba.FabricClockPerState[i], mode_lib->vba.DRAMSpeedPerState[i]); - HostVMInefficiencyFactor = 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor = 1; if (mode_lib->vba.GPUVMEnable && mode_lib->vba.HostVMEnable) - HostVMInefficiencyFactor = mode_lib->vba.ReturnBWPerState[i][j] - / VMDataOnlyReturnBWPerState; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor = mode_lib->vba.ReturnBWPerState[i][j] + / v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.VMDataOnlyReturnBWPerState; mode_lib->vba.ExtraLatency = dml32_CalculateExtraLatency( - mode_lib->vba.RoundTripPingLatencyCycles, ReorderingBytes, + mode_lib->vba.RoundTripPingLatencyCycles, v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes, mode_lib->vba.DCFCLKState[i][j], mode_lib->vba.TotalNumberOfActiveDPP[i][j], mode_lib->vba.PixelChunkSizeInKByte, mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j], mode_lib->vba.MetaChunkSize, mode_lib->vba.ReturnBWPerState[i][j], mode_lib->vba.GPUVMEnable, mode_lib->vba.HostVMEnable, mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.NoOfDPPThisState, mode_lib->vba.dpte_group_bytes, - HostVMInefficiencyFactor, mode_lib->vba.HostVMMinPageSize, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, mode_lib->vba.HostVMMinPageSize, mode_lib->vba.HostVMMaxNonCachedPageTableLevels); - NextPrefetchModeState = mode_lib->vba.MinPrefetchMode; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState = mode_lib->vba.MinPrefetchMode; mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[i][j]; do { - mode_lib->vba.PrefetchModePerState[i][j] = NextPrefetchModeState; + mode_lib->vba.PrefetchModePerState[i][j] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState; mode_lib->vba.MaxVStartup = mode_lib->vba.NextMaxVStartup; for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { - DmlPipe myPipe; - mode_lib->vba.TWait = dml32_CalculateTWait( mode_lib->vba.PrefetchModePerState[i][j], mode_lib->vba.UsesMALLForPStateChange[k], @@ -3212,34 +3163,34 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.FCLKChangeLatency, mode_lib->vba.UrgLatency[i], mode_lib->vba.SREnterPlusExitTime); - myPipe.Dppclk = mode_lib->vba.RequiredDPPCLK[i][j][k]; - myPipe.Dispclk = mode_lib->vba.RequiredDISPCLK[i][j]; - myPipe.PixelClock = mode_lib->vba.PixelClock[k]; - myPipe.DCFClkDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j]; - myPipe.DPPPerSurface = mode_lib->vba.NoOfDPP[i][j][k]; - myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; - myPipe.SourceRotation = mode_lib->vba.SourceRotation[k]; - myPipe.BlockWidth256BytesY = mode_lib->vba.Read256BlockWidthY[k]; - myPipe.BlockHeight256BytesY = mode_lib->vba.Read256BlockHeightY[k]; - myPipe.BlockWidth256BytesC = mode_lib->vba.Read256BlockWidthC[k]; - myPipe.BlockHeight256BytesC = mode_lib->vba.Read256BlockHeightC[k]; - myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; - myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; - myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; - myPipe.HTotal = mode_lib->vba.HTotal[k]; - myPipe.HActive = mode_lib->vba.HActive[k]; - myPipe.DCCEnable = mode_lib->vba.DCCEnable[k]; - myPipe.ODMMode = mode_lib->vba.ODMCombineEnablePerState[i][k]; - myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k]; - myPipe.BytePerPixelY = mode_lib->vba.BytePerPixelY[k]; - myPipe.BytePerPixelC = mode_lib->vba.BytePerPixelC[k]; - myPipe.ProgressiveToInterlaceUnitInOPP = + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.Dppclk = mode_lib->vba.RequiredDPPCLK[i][j][k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.Dispclk = mode_lib->vba.RequiredDISPCLK[i][j]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.PixelClock = mode_lib->vba.PixelClock[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DCFClkDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DPPPerSurface = mode_lib->vba.NoOfDPP[i][j][k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.SourceRotation = mode_lib->vba.SourceRotation[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockWidth256BytesY = mode_lib->vba.Read256BlockWidthY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockHeight256BytesY = mode_lib->vba.Read256BlockHeightY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockWidth256BytesC = mode_lib->vba.Read256BlockWidthC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockHeight256BytesC = mode_lib->vba.Read256BlockHeightC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.HTotal = mode_lib->vba.HTotal[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.HActive = mode_lib->vba.HActive[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DCCEnable = mode_lib->vba.DCCEnable[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ODMMode = mode_lib->vba.ODMCombineEnablePerState[i][k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BytePerPixelY = mode_lib->vba.BytePerPixelY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BytePerPixelC = mode_lib->vba.BytePerPixelC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP; mode_lib->vba.NoTimeForPrefetch[i][j][k] = dml32_CalculatePrefetchSchedule( - HostVMInefficiencyFactor, - &myPipe, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe, mode_lib->vba.DSCDelayPerState[i][k], mode_lib->vba.DPPCLKDelaySubtotal + mode_lib->vba.DPPCLKDelayCNVCFormater, @@ -3298,7 +3249,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[0], // double *Tdmdl_vm &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[1], // double *Tdmdl &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[2], // double *TSetup - &dummy_integer[0], // unsigned int *VUpdateOffsetPix + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], // unsigned int *VUpdateOffsetPix &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[3], // unsigned int *VUpdateWidthPix &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[4]); // unsigned int *VReadyOffsetPix } @@ -3427,7 +3378,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { - dml32_CalculateFlipSchedule(HostVMInefficiencyFactor, + dml32_CalculateFlipSchedule(v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, mode_lib->vba.ExtraLatency, mode_lib->vba.UrgLatency[i], mode_lib->vba.GPUVMMaxPageTableLevels, @@ -3501,7 +3452,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l if (mode_lib->vba.MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || mode_lib->vba.AnyLinesForVMOrRowTooLarge == false) { mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[i][j]; - NextPrefetchModeState = NextPrefetchModeState + 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState + 1; } else { mode_lib->vba.NextMaxVStartup = mode_lib->vba.NextMaxVStartup - 1; } @@ -3515,7 +3466,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l && !mode_lib->vba.ImmediateFlipRequiredFinal) || mode_lib->vba.ImmediateFlipSupportedForState[i][j] == true)) || (mode_lib->vba.NextMaxVStartup == mode_lib->vba.MaxMaxVStartup[i][j] - && NextPrefetchModeState > mode_lib->vba.MaxPrefetchMode))); + && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState > mode_lib->vba.MaxPrefetchMode))); for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { mode_lib->vba.use_one_row_for_frame_this_state[k] = @@ -3591,7 +3542,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &mode_lib->vba.Watermark, // Store the values in vba &mode_lib->vba.DRAMClockChangeSupport[i][j], &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single2[0], // double *MaxActiveDRAMClockChangeLatencySupported - &dummy_integer[0], // Long SubViewportLinesNeededInMALL[] + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], // Long SubViewportLinesNeededInMALL[] &mode_lib->vba.FCLKChangeSupport[i][j], &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single2[1], // double *MinActiveFCLKChangeLatencySupported &mode_lib->vba.USRRetrainingSupport[i][j], diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index 6e61b53823619..492aec634b685 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -196,6 +196,13 @@ struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCal unsigned int ReorderBytes; unsigned int VMDataOnlyReturnBW; double HostVMInefficiencyFactor; + DmlPipe myPipe; + SOCParametersList mmSOCParameters; + double dummy_unit_vector[DC__NUM_DPP__MAX]; + double dummy_single[2]; + enum clock_change_support dummy_dramchange_support; + enum dm_fclock_change_support dummy_fclkchange_support; + bool dummy_USRRetrainingSupport; }; struct dml32_ModeSupportAndSystemConfigurationFull { @@ -211,6 +218,35 @@ struct dml32_ModeSupportAndSystemConfigurationFull { double DSTXAfterScaler[DC__NUM_DPP__MAX]; double MaxTotalVActiveRDBandwidth; bool dummy_boolean_array[2][DC__NUM_DPP__MAX]; + enum odm_combine_mode dummy_odm_mode[DC__NUM_DPP__MAX]; + DmlPipe myPipe; + unsigned int dummy_integer[4]; + unsigned int TotalNumberOfActiveOTG; + unsigned int TotalNumberOfActiveHDMIFRL; + unsigned int TotalNumberOfActiveDP2p0; + unsigned int TotalNumberOfActiveDP2p0Outputs; + unsigned int TotalDSCUnitsRequired; + unsigned int ReorderingBytes; + unsigned int TotalSlots; + unsigned int NumberOfDPPDSC; + unsigned int NumberOfDPPNoDSC; + unsigned int NextPrefetchModeState; + bool MPCCombineMethodAsNeededForPStateChangeAndVoltage; + bool MPCCombineMethodAsPossible; + bool FullFrameMALLPStateMethod; + bool SubViewportMALLPStateMethod; + bool PhantomPipeMALLPStateMethod; + bool NoChroma; + bool TotalAvailablePipesSupportNoDSC; + bool TotalAvailablePipesSupportDSC; + enum odm_combine_mode ODMModeNoDSC; + enum odm_combine_mode ODMModeDSC; + double RequiredDISPCLKPerSurfaceNoDSC; + double RequiredDISPCLKPerSurfaceDSC; + double BWOfNonCombinedSurfaceOfMaximumBandwidth; + double VMDataOnlyReturnBWPerState; + double HostVMInefficiencyFactor; + bool dummy_boolean[2]; }; struct dummy_vars { From 43ca1f93df6458978e48bd7b628a15d1d1cff0b2 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 22 Jul 2022 13:56:17 -0400 Subject: [PATCH 04/79] drm/amd/display: Reduce stack size in the mode support function When we use the allmodconfig option we see the following error: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c: In function 'dml32_ModeSupportAndSystemConfigurationFull': drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:3799:1: error: the frame size of 2464 bytes is larger than 2048 bytes [-Werror=frame-larger-than=] 3799 | } // ModeSupportAndSystemConfigurationFull This commit fixes this issue by moving part of the mode support operation from ModeSupportAndSystemConfigurationFull to a dedicated function. Cc: Harry Wentland Cc: Alex Deucher Cc: Aurabindo Pillai Cc: Stephen Rothwell Tested-by: Stephen Rothwell Acked-by: Alex Deucher Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../dc/dml/dcn32/display_mode_vba_32.c | 132 ++++++++++-------- 1 file changed, 70 insertions(+), 62 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 9c2003fbe8fa4..cb2025771646b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -1649,6 +1649,75 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman #endif } +static void mode_support_configuration(struct vba_vars_st *v, + struct display_mode_lib *mode_lib) +{ + int i, j; + + for (i = v->soc.num_states - 1; i >= 0; i--) { + for (j = 0; j < 2; j++) { + if (mode_lib->vba.ScaleRatioAndTapsSupport == true + && mode_lib->vba.SourceFormatPixelAndScanSupport == true + && mode_lib->vba.ViewportSizeSupport[i][j] == true + && !mode_lib->vba.LinkRateDoesNotMatchDPVersion + && !mode_lib->vba.LinkRateForMultistreamNotIndicated + && !mode_lib->vba.BPPForMultistreamNotIndicated + && !mode_lib->vba.MultistreamWithHDMIOreDP + && !mode_lib->vba.ExceededMultistreamSlots[i] + && !mode_lib->vba.MSOOrODMSplitWithNonDPLink + && !mode_lib->vba.NotEnoughLanesForMSO + && mode_lib->vba.LinkCapacitySupport[i] == true && !mode_lib->vba.P2IWith420 + && !mode_lib->vba.DSCOnlyIfNecessaryWithBPP + && !mode_lib->vba.DSC422NativeNotSupported + && !mode_lib->vba.MPCCombineMethodIncompatible + && mode_lib->vba.ODMCombine2To1SupportCheckOK[i] == true + && mode_lib->vba.ODMCombine4To1SupportCheckOK[i] == true + && mode_lib->vba.NotEnoughDSCUnits[i] == false + && !mode_lib->vba.NotEnoughDSCSlices[i] + && !mode_lib->vba.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe + && !mode_lib->vba.InvalidCombinationOfMALLUseForPStateAndStaticScreen + && mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] == false + && mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] + && mode_lib->vba.DTBCLKRequiredMoreThanSupported[i] == false + && !mode_lib->vba.InvalidCombinationOfMALLUseForPState + && !mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified + && mode_lib->vba.ROBSupport[i][j] == true + && mode_lib->vba.DISPCLK_DPPCLK_Support[i][j] == true + && mode_lib->vba.TotalAvailablePipesSupport[i][j] == true + && mode_lib->vba.NumberOfOTGSupport == true + && mode_lib->vba.NumberOfHDMIFRLSupport == true + && mode_lib->vba.EnoughWritebackUnits == true + && mode_lib->vba.WritebackLatencySupport == true + && mode_lib->vba.WritebackScaleRatioAndTapsSupport == true + && mode_lib->vba.CursorSupport == true && mode_lib->vba.PitchSupport == true + && mode_lib->vba.ViewportExceedsSurface == false + && mode_lib->vba.PrefetchSupported[i][j] == true + && mode_lib->vba.VActiveBandwithSupport[i][j] == true + && mode_lib->vba.DynamicMetadataSupported[i][j] == true + && mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][j] == true + && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true + && mode_lib->vba.PTEBufferSizeNotExceeded[i][j] == true + && mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] == true + && mode_lib->vba.NonsupportedDSCInputBPC == false + && !mode_lib->vba.ExceededMALLSize + && ((mode_lib->vba.HostVMEnable == false + && !mode_lib->vba.ImmediateFlipRequiredFinal) + || mode_lib->vba.ImmediateFlipSupportedForState[i][j]) + && (!mode_lib->vba.DRAMClockChangeRequirementFinal + || i == v->soc.num_states - 1 + || mode_lib->vba.DRAMClockChangeSupport[i][j] != dm_dram_clock_change_unsupported) + && (!mode_lib->vba.FCLKChangeRequirementFinal || i == v->soc.num_states - 1 + || mode_lib->vba.FCLKChangeSupport[i][j] != dm_fclock_change_unsupported) + && (!mode_lib->vba.USRRetrainingRequiredFinal + || mode_lib->vba.USRRetrainingSupport[i][j])) { + mode_lib->vba.ModeSupport[i][j] = true; + } else { + mode_lib->vba.ModeSupport[i][j] = false; + } + } + } +} + void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) { struct vba_vars_st *v = &mode_lib->vba; @@ -3622,68 +3691,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } /*Mode Support, Voltage State and SOC Configuration*/ - for (i = v->soc.num_states - 1; i >= 0; i--) { - for (j = 0; j < 2; j++) { - if (mode_lib->vba.ScaleRatioAndTapsSupport == true - && mode_lib->vba.SourceFormatPixelAndScanSupport == true - && mode_lib->vba.ViewportSizeSupport[i][j] == true - && !mode_lib->vba.LinkRateDoesNotMatchDPVersion - && !mode_lib->vba.LinkRateForMultistreamNotIndicated - && !mode_lib->vba.BPPForMultistreamNotIndicated - && !mode_lib->vba.MultistreamWithHDMIOreDP - && !mode_lib->vba.ExceededMultistreamSlots[i] - && !mode_lib->vba.MSOOrODMSplitWithNonDPLink - && !mode_lib->vba.NotEnoughLanesForMSO - && mode_lib->vba.LinkCapacitySupport[i] == true && !mode_lib->vba.P2IWith420 - && !mode_lib->vba.DSCOnlyIfNecessaryWithBPP - && !mode_lib->vba.DSC422NativeNotSupported - && !mode_lib->vba.MPCCombineMethodIncompatible - && mode_lib->vba.ODMCombine2To1SupportCheckOK[i] == true - && mode_lib->vba.ODMCombine4To1SupportCheckOK[i] == true - && mode_lib->vba.NotEnoughDSCUnits[i] == false - && !mode_lib->vba.NotEnoughDSCSlices[i] - && !mode_lib->vba.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe - && !mode_lib->vba.InvalidCombinationOfMALLUseForPStateAndStaticScreen - && mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] == false - && mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] - && mode_lib->vba.DTBCLKRequiredMoreThanSupported[i] == false - && !mode_lib->vba.InvalidCombinationOfMALLUseForPState - && !mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified - && mode_lib->vba.ROBSupport[i][j] == true - && mode_lib->vba.DISPCLK_DPPCLK_Support[i][j] == true - && mode_lib->vba.TotalAvailablePipesSupport[i][j] == true - && mode_lib->vba.NumberOfOTGSupport == true - && mode_lib->vba.NumberOfHDMIFRLSupport == true - && mode_lib->vba.EnoughWritebackUnits == true - && mode_lib->vba.WritebackLatencySupport == true - && mode_lib->vba.WritebackScaleRatioAndTapsSupport == true - && mode_lib->vba.CursorSupport == true && mode_lib->vba.PitchSupport == true - && mode_lib->vba.ViewportExceedsSurface == false - && mode_lib->vba.PrefetchSupported[i][j] == true - && mode_lib->vba.VActiveBandwithSupport[i][j] == true - && mode_lib->vba.DynamicMetadataSupported[i][j] == true - && mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][j] == true - && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true - && mode_lib->vba.PTEBufferSizeNotExceeded[i][j] == true - && mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] == true - && mode_lib->vba.NonsupportedDSCInputBPC == false - && !mode_lib->vba.ExceededMALLSize - && ((mode_lib->vba.HostVMEnable == false - && !mode_lib->vba.ImmediateFlipRequiredFinal) - || mode_lib->vba.ImmediateFlipSupportedForState[i][j]) - && (!mode_lib->vba.DRAMClockChangeRequirementFinal - || i == v->soc.num_states - 1 - || mode_lib->vba.DRAMClockChangeSupport[i][j] != dm_dram_clock_change_unsupported) - && (!mode_lib->vba.FCLKChangeRequirementFinal || i == v->soc.num_states - 1 - || mode_lib->vba.FCLKChangeSupport[i][j] != dm_fclock_change_unsupported) - && (!mode_lib->vba.USRRetrainingRequiredFinal - || mode_lib->vba.USRRetrainingSupport[i][j])) { - mode_lib->vba.ModeSupport[i][j] = true; - } else { - mode_lib->vba.ModeSupport[i][j] = false; - } - } - } + mode_support_configuration(v, mode_lib); MaximumMPCCombine = 0; From e6ca678ddda2e08f02a75d60f17ebe7e6961c07a Mon Sep 17 00:00:00 2001 From: Magali Lemes Date: Tue, 2 Aug 2022 09:04:06 -0300 Subject: [PATCH 05/79] drm/amd/display: remove DML Makefile duplicate lines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are two identical CFLAGS entries for "display_mode_vba_20.o", so remove one of them. Also, as there's already an entry for "display_mode_lib.o" CFLAGS, regardless of CONFIG_DRM_AMD_DC_DCN being defined or not, remove the one entry between CONFIG_DRM_AMD_DC_DCN ifdef guards. Signed-off-by: Magali Lemes Reviewed-by: André Almeida Reviewed-by: Harry Wentland --- drivers/gpu/drm/amd/display/dc/dml/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index cc73c9763e666..7f01462987d18 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -59,7 +59,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags) @@ -80,7 +79,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn303/dcn303_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_ccflags) -Wno-tautological-compare From f7a80edcebccc382e63bff13fc2720d7258fb6c7 Mon Sep 17 00:00:00 2001 From: Magali Lemes Date: Fri, 29 Jul 2022 22:06:58 -0300 Subject: [PATCH 06/79] drm/amd/display: make variables static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As "dcn3_1_soc", "dcn3_15_soc", and "dcn3_16_soc" are not used outside of their corresponding "dcn3*_fpu.c", make them static and remove their extern declaration. Fixes: 26f4712aedbd ("drm/amd/display: move FPU related code from dcn31 to dml/dcn31 folder") Fixes: fa896297b31b ("drm/amd/display: move FPU related code from dcn315 to dml/dcn31 folder") Fixes: 3f8951cc123f ("drm/amd/display: move FPU related code from dcn316 to dml/dcn31 folder") Signed-off-by: Magali Lemes Reviewed-by: Rodrigo Siqueira Reviewed-by: Maíra Canal Reviewed-by: Melissa Wen --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h | 1 - drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h | 1 - drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h | 1 - drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c | 6 +++--- 4 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h index 41f8ec99da6b3..901436591ed45 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h @@ -32,7 +32,6 @@ container_of(pool, struct dcn31_resource_pool, base) extern struct _vcs_dpi_ip_params_st dcn3_1_ip; -extern struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc; struct dcn31_resource_pool { struct resource_pool base; diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h index 39929fa67a510..22849eaa6f243 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h @@ -32,7 +32,6 @@ container_of(pool, struct dcn315_resource_pool, base) extern struct _vcs_dpi_ip_params_st dcn3_15_ip; -extern struct _vcs_dpi_ip_params_st dcn3_15_soc; struct dcn315_resource_pool { struct resource_pool base; diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h index 0dc5a6c13ae7d..aba6d634131b4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h @@ -32,7 +32,6 @@ container_of(pool, struct dcn316_resource_pool, base) extern struct _vcs_dpi_ip_params_st dcn3_16_ip; -extern struct _vcs_dpi_ip_params_st dcn3_16_soc; struct dcn316_resource_pool { struct resource_pool base; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c index e36cfa5985ea9..5664653ba5ac1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c @@ -114,7 +114,7 @@ struct _vcs_dpi_ip_params_st dcn3_1_ip = { .dcc_supported = true, }; -struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = { +static struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = { /*TODO: correct dispclk/dppclk voltage level determination*/ .clock_limits = { { @@ -259,7 +259,7 @@ struct _vcs_dpi_ip_params_st dcn3_15_ip = { .dcc_supported = true, }; -struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = { +static struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = { .sr_exit_time_us = 9.0, .sr_enter_plus_exit_time_us = 11.0, .sr_exit_z8_time_us = 50.0, @@ -355,7 +355,7 @@ struct _vcs_dpi_ip_params_st dcn3_16_ip = { .dcc_supported = true, }; -struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = { +static struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = { /*TODO: correct dispclk/dppclk voltage level determination*/ .clock_limits = { { From aa0c493fadd5b88a5998f78aee3e6f571a765aa7 Mon Sep 17 00:00:00 2001 From: Magali Lemes Date: Fri, 29 Jul 2022 22:07:00 -0300 Subject: [PATCH 07/79] drm/amd/display: remove header from source file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since "rn_clk_mgr.h" needs ‘struct clk_mgr_internal’ which is declared in "clk_mgr_internal.h", include "clk_mgr_internal.h" in "rn_clk_mgr.h" instead of in its source file. Because of the change above, change the order of '#include "rn_clk_mgr.h"', so that the necessary structs are visible to dcn20_clk_mgr.h. Signed-off-by: Magali Lemes Reviewed-by: Rodrigo Siqueira Reviewed-by: Maíra Canal Reviewed-by: Melissa Wen --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 3 +-- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index 0202dc682682b..ca6dfd2d7561f 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -24,10 +24,9 @@ */ #include "dccg.h" -#include "clk_mgr_internal.h" +#include "rn_clk_mgr.h" #include "dcn20/dcn20_clk_mgr.h" -#include "rn_clk_mgr.h" #include "dml/dcn20/dcn20_fpu.h" #include "dce100/dce_clk_mgr.h" diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h index 2e088c5171b28..f1319957e400a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h @@ -28,6 +28,7 @@ #include "clk_mgr.h" #include "dm_pp_smu.h" +#include "clk_mgr_internal.h" extern struct wm_table ddr4_wm_table_gs; extern struct wm_table lpddr4_wm_table_gs; From 04d9f8ccd34dfe5bf9f49f4a01c51735d26d0bf5 Mon Sep 17 00:00:00 2001 From: Magali Lemes Date: Fri, 29 Jul 2022 22:07:02 -0300 Subject: [PATCH 08/79] drm/amd/display: include missing headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add missing headers to solve the following warnings from sparse: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn20/dcn20_fpu.c:656:17: warning: symbol 'ddr4_wm_table_gs' was not declared. Should it be static? drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn20/dcn20_fpu.c:693:17: warning: symbol 'lpddr4_wm_table_gs' was not declared. Should it be static? drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn20/dcn20_fpu.c:730:17: warning: symbol 'lpddr4_wm_table_with_disabled_ppt' was not declared. Should it be static? drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn20/dcn20_fpu.c:767:17: warning: symbol 'ddr4_wm_table_rn' was not declared. Should it be static? drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn20/dcn20_fpu.c:804:17: warning: symbol 'ddr4_1R_wm_table_rn' was not declared. Should it be static? drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn20/dcn20_fpu.c:841:17: warning: symbol 'lpddr4_wm_table_rn' was not declared. Should it be static? drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn301/dcn301_fpu.c:217:17: warning: symbol 'ddr4_wm_table' was not declared. Should it be static? drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn301/dcn301_fpu.c:254:17: warning: symbol 'lpddr5_wm_table' was not declared. Should it be static? drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/dcn31_fpu.c:53:30: warning: symbol 'dcn3_1_ip' was not declared. Should it be static? drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/dcn31_fpu.c:197:30: warning: symbol 'dcn3_15_ip' was not declared. Should it be static? drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/dcn31_fpu.c:293:30: warning: symbol 'dcn3_16_ip' was not declared. Should it be static? Fixes: 490d2bc889f1 ("drm/amd/display: move FPU code on dcn21 clk_mgr") Fixes: 83916f9a32a4 ("drm/amd/display: move FPU code from dcn301 clk mgr to DML folder") Fixes: 26f4712aedbd ("drm/amd/display: move FPU related code from dcn31 to dml/dcn31 folder") Fixes: fa896297b31b ("drm/amd/display: move FPU related code from dcn315 to dml/dcn31 folder") Fixes: 3f8951cc123f ("drm/amd/display: move FPU related code from dcn316 to dml/dcn31 folder") Signed-off-by: Magali Lemes Reviewed-by: Rodrigo Siqueira Reviewed-by: Maíra Canal Reviewed-by: Melissa Wen --- drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 1 + drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c | 1 + drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c | 3 +++ 3 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index ca44df4fca747..d34e0f1314d91 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -30,6 +30,7 @@ #include "dchubbub.h" #include "dcn20/dcn20_resource.h" #include "dcn21/dcn21_resource.h" +#include "clk_mgr/dcn21/rn_clk_mgr.h" #include "dcn20_fpu.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c index 7ef66e511ec8e..d211cf6d234c7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c @@ -26,6 +26,7 @@ #include "clk_mgr.h" #include "dcn20/dcn20_resource.h" #include "dcn301/dcn301_resource.h" +#include "clk_mgr/dcn301/vg_clk_mgr.h" #include "dml/dcn20/dcn20_fpu.h" #include "dcn301_fpu.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c index 5664653ba5ac1..149a1b17cdf3f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c @@ -25,6 +25,9 @@ #include "resource.h" #include "clk_mgr.h" +#include "dcn31/dcn31_resource.h" +#include "dcn315/dcn315_resource.h" +#include "dcn316/dcn316_resource.h" #include "dml/dcn20/dcn20_fpu.h" #include "dcn31_fpu.h" From 75495d884b80a1cac67a364721cb3edd93497ed5 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 4 Aug 2022 15:50:36 +1000 Subject: [PATCH 09/79] drm/amd/display: restore plane with no modifiers code. When this file was split in 5d945cbcd4b16a29d6470a80dfb19738f9a4319f Author: Rodrigo Siqueira Date: Wed Jul 20 15:31:42 2022 -0400 drm/amd/display: Create a file dedicated to planes This chunk seemed to get dropped. Linus noticed on this rx580 and I've reproduced on FIJI which makes sense as these are pre-modifier GPUs. With this applied, I get gdm back. Fixes: 5d945cbcd4b1 ("drm/amd/display: Create a file dedicated to planes") Signed-off-by: Dave Airlie Reviewed-by: Harry Wentland --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 440ea0d19f899..d534b3d8328db 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1824,6 +1824,9 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, return res; #endif + if (modifiers == NULL) + adev_to_drm(dm->adev)->mode_config.fb_modifiers_not_supported = true; + res = drm_universal_plane_init(adev_to_drm(dm->adev), plane, possible_crtcs, &dm_plane_funcs, formats, num_formats, modifiers, plane->type, NULL); From f66ccbe6223ff671cb19f8c1fb86dc1cf558f36e Mon Sep 17 00:00:00 2001 From: Asher Song Date: Mon, 8 Aug 2022 14:37:27 +0800 Subject: [PATCH 10/79] drm/amdkcl: wrap the code under macro HAVE_DRM_MODE_CONFIG_FB_MODIFIERS_NOT_SUPPORTED It's caused by 33c9a3a8fff86ea024f983ce5bceeee4130459d9 "drm/amd/display: restore plane with no modifiers code." v5.18-2526-gca3175810764 Signed-off-by: Asher Song Reviewed-by: Leslie Shi --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index d534b3d8328db..08b1d10e021c9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1824,8 +1824,10 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, return res; #endif +#ifdef HAVE_DRM_MODE_CONFIG_FB_MODIFIERS_NOT_SUPPORTED if (modifiers == NULL) adev_to_drm(dm->adev)->mode_config.fb_modifiers_not_supported = true; +#endif res = drm_universal_plane_init(adev_to_drm(dm->adev), plane, possible_crtcs, &dm_plane_funcs, formats, num_formats, From 71bdde23e916addfe233039ce8db8ab07d29b102 Mon Sep 17 00:00:00 2001 From: Victor Zhao Date: Thu, 28 Jul 2022 10:44:47 +0800 Subject: [PATCH 11/79] drm/amdgpu: add mode2 reset for sienna_cichlid To meet the requirement for multi container usecase which needs a quicker reset and not causing VRAM lost, adding the Mode2 reset handler for sienna_cichlid. v2: move skip mode2 flag part separately v3: remove the use of asic_reset_res Signed-off-by: Victor Zhao Acked-by: Andrey Grodzovsky --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 7 + drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c | 296 ++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/sienna_cichlid.h | 32 ++ .../pm/swsmu/inc/pmfw_if/smu_v11_0_7_ppsmc.h | 4 +- drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 3 +- .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 54 ++++ 7 files changed, 394 insertions(+), 4 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c create mode 100644 drivers/gpu/drm/amd/amdgpu/sienna_cichlid.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 5f2470c34334a..782bc7d05f3c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -79,7 +79,7 @@ amdgpu-y += \ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \ nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o \ - nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o + sienna_cichlid.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o # add DF block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 32c86a0b145ca..f778466bb9dbd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -23,6 +23,7 @@ #include "amdgpu_reset.h" #include "aldebaran.h" +#include "sienna_cichlid.h" int amdgpu_reset_add_handler(struct amdgpu_reset_control *reset_ctl, struct amdgpu_reset_handler *handler) @@ -40,6 +41,9 @@ int amdgpu_reset_init(struct amdgpu_device *adev) case IP_VERSION(13, 0, 2): ret = aldebaran_reset_init(adev); break; + case IP_VERSION(11, 0, 7): + ret = sienna_cichlid_reset_init(adev); + break; default: break; } @@ -55,6 +59,9 @@ int amdgpu_reset_fini(struct amdgpu_device *adev) case IP_VERSION(13, 0, 2): ret = aldebaran_reset_fini(adev); break; + case IP_VERSION(11, 0, 7): + ret = sienna_cichlid_reset_fini(adev); + break; default: break; } diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c new file mode 100644 index 0000000000000..b61a8ddec7ef0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c @@ -0,0 +1,296 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "sienna_cichlid.h" +#include "amdgpu_reset.h" +#include "amdgpu_amdkfd.h" +#include "amdgpu_dpm.h" +#include "amdgpu_job.h" +#include "amdgpu_ring.h" +#include "amdgpu_ras.h" +#include "amdgpu_psp.h" +#include "amdgpu_xgmi.h" + +static struct amdgpu_reset_handler * +sienna_cichlid_get_reset_handler(struct amdgpu_reset_control *reset_ctl, + struct amdgpu_reset_context *reset_context) +{ + struct amdgpu_reset_handler *handler; + struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; + + if (reset_context->method != AMD_RESET_METHOD_NONE) { + list_for_each_entry(handler, &reset_ctl->reset_handlers, + handler_list) { + if (handler->reset_method == reset_context->method) + return handler; + } + } else { + list_for_each_entry(handler, &reset_ctl->reset_handlers, + handler_list) { + if (handler->reset_method == AMD_RESET_METHOD_MODE2 && + adev->pm.fw_version >= 0x3a5500 && + !amdgpu_sriov_vf(adev)) { + reset_context->method = AMD_RESET_METHOD_MODE2; + return handler; + } + } + } + + return NULL; +} + +static int sienna_cichlid_mode2_suspend_ip(struct amdgpu_device *adev) +{ + int r, i; + + amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); + amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); + + for (i = adev->num_ip_blocks - 1; i >= 0; i--) { + if (!(adev->ip_blocks[i].version->type == + AMD_IP_BLOCK_TYPE_GFX || + adev->ip_blocks[i].version->type == + AMD_IP_BLOCK_TYPE_SDMA)) + continue; + + r = adev->ip_blocks[i].version->funcs->suspend(adev); + + if (r) { + dev_err(adev->dev, + "suspend of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } + adev->ip_blocks[i].status.hw = false; + } + + return r; +} + +static int +sienna_cichlid_mode2_prepare_hwcontext(struct amdgpu_reset_control *reset_ctl, + struct amdgpu_reset_context *reset_context) +{ + int r = 0; + struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; + + if (!amdgpu_sriov_vf(adev)) + r = sienna_cichlid_mode2_suspend_ip(adev); + + return r; +} + +static void sienna_cichlid_async_reset(struct work_struct *work) +{ + struct amdgpu_reset_handler *handler; + struct amdgpu_reset_control *reset_ctl = + container_of(work, struct amdgpu_reset_control, reset_work); + struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; + + list_for_each_entry(handler, &reset_ctl->reset_handlers, + handler_list) { + if (handler->reset_method == reset_ctl->active_reset) { + dev_dbg(adev->dev, "Resetting device\n"); + handler->do_reset(adev); + break; + } + } +} + +static int sienna_cichlid_mode2_reset(struct amdgpu_device *adev) +{ + /* disable BM */ + pci_clear_master(adev->pdev); + return amdgpu_dpm_mode2_reset(adev); +} + +static int +sienna_cichlid_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl, + struct amdgpu_reset_context *reset_context) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; + int r; + + r = sienna_cichlid_mode2_reset(adev); + if (r) { + dev_err(adev->dev, + "ASIC reset failed with error, %d ", r); + } + return r; +} + +static int sienna_cichlid_mode2_restore_ip(struct amdgpu_device *adev) +{ + int i, r; + struct psp_context *psp = &adev->psp; + + r = psp_rlc_autoload_start(psp); + if (r) { + dev_err(adev->dev, "Failed to start rlc autoload\n"); + return r; + } + + /* Reinit GFXHUB */ + adev->gfxhub.funcs->init(adev); + r = adev->gfxhub.funcs->gart_enable(adev); + if (r) { + dev_err(adev->dev, "GFXHUB gart reenable failed after reset\n"); + return r; + } + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { + r = adev->ip_blocks[i].version->funcs->resume(adev); + if (r) { + dev_err(adev->dev, + "resume of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } + + adev->ip_blocks[i].status.hw = true; + } + } + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!(adev->ip_blocks[i].version->type == + AMD_IP_BLOCK_TYPE_GFX || + adev->ip_blocks[i].version->type == + AMD_IP_BLOCK_TYPE_SDMA)) + continue; + r = adev->ip_blocks[i].version->funcs->resume(adev); + if (r) { + dev_err(adev->dev, + "resume of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } + + adev->ip_blocks[i].status.hw = true; + } + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!(adev->ip_blocks[i].version->type == + AMD_IP_BLOCK_TYPE_GFX || + adev->ip_blocks[i].version->type == + AMD_IP_BLOCK_TYPE_SDMA)) + continue; + + if (adev->ip_blocks[i].version->funcs->late_init) { + r = adev->ip_blocks[i].version->funcs->late_init( + (void *)adev); + if (r) { + dev_err(adev->dev, + "late_init of IP block <%s> failed %d after reset\n", + adev->ip_blocks[i].version->funcs->name, + r); + return r; + } + } + adev->ip_blocks[i].status.late_initialized = true; + } + + amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); + amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); + + return r; +} + +static int +sienna_cichlid_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, + struct amdgpu_reset_context *reset_context) +{ + int r; + struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle; + + dev_info(tmp_adev->dev, + "GPU reset succeeded, trying to resume\n"); + r = sienna_cichlid_mode2_restore_ip(tmp_adev); + if (r) + goto end; + + /* + * Add this ASIC as tracked as reset was already + * complete successfully. + */ + amdgpu_register_gpu_instance(tmp_adev); + + /* Resume RAS */ + amdgpu_ras_resume(tmp_adev); + + amdgpu_irq_gpu_reset_resume_helper(tmp_adev); + + r = amdgpu_ib_ring_tests(tmp_adev); + if (r) { + dev_err(tmp_adev->dev, + "ib ring test failed (%d).\n", r); + r = -EAGAIN; + goto end; + } + +end: + if (r) + return -EAGAIN; + else + return r; +} + +static struct amdgpu_reset_handler sienna_cichlid_mode2_handler = { + .reset_method = AMD_RESET_METHOD_MODE2, + .prepare_env = NULL, + .prepare_hwcontext = sienna_cichlid_mode2_prepare_hwcontext, + .perform_reset = sienna_cichlid_mode2_perform_reset, + .restore_hwcontext = sienna_cichlid_mode2_restore_hwcontext, + .restore_env = NULL, + .do_reset = sienna_cichlid_mode2_reset, +}; + +int sienna_cichlid_reset_init(struct amdgpu_device *adev) +{ + struct amdgpu_reset_control *reset_ctl; + + reset_ctl = kzalloc(sizeof(*reset_ctl), GFP_KERNEL); + if (!reset_ctl) + return -ENOMEM; + + reset_ctl->handle = adev; + reset_ctl->async_reset = sienna_cichlid_async_reset; + reset_ctl->active_reset = AMD_RESET_METHOD_NONE; + reset_ctl->get_reset_handler = sienna_cichlid_get_reset_handler; + + INIT_LIST_HEAD(&reset_ctl->reset_handlers); + INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset); + /* Only mode2 is handled through reset control now */ + amdgpu_reset_add_handler(reset_ctl, &sienna_cichlid_mode2_handler); + + adev->reset_cntl = reset_ctl; + + return 0; +} + +int sienna_cichlid_reset_fini(struct amdgpu_device *adev) +{ + kfree(adev->reset_cntl); + adev->reset_cntl = NULL; + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.h b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.h new file mode 100644 index 0000000000000..5213b162dacd3 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.h @@ -0,0 +1,32 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __SIENNA_CICHLID_H__ +#define __SIENNA_CICHLID_H__ + +#include "amdgpu.h" + +int sienna_cichlid_reset_init(struct amdgpu_device *adev); +int sienna_cichlid_reset_fini(struct amdgpu_device *adev); + +#endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_0_7_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_0_7_ppsmc.h index d2e10a724560f..82cf9e563065f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_0_7_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_0_7_ppsmc.h @@ -137,7 +137,7 @@ #define PPSMC_MSG_DisallowGpo 0x56 #define PPSMC_MSG_Enable2ndUSB20Port 0x57 - -#define PPSMC_Message_Count 0x58 +#define PPSMC_MSG_DriverMode2Reset 0x5D +#define PPSMC_Message_Count 0x5E #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index 19084a4fcb2bd..28f6a1eb6945c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -235,7 +235,8 @@ __SMU_DUMMY_MAP(UnforceGfxVid), \ __SMU_DUMMY_MAP(HeavySBR), \ __SMU_DUMMY_MAP(SetBadHBMPagesRetiredFlagsPerChannel), \ - __SMU_DUMMY_MAP(EnableGfxImu), + __SMU_DUMMY_MAP(EnableGfxImu), \ + __SMU_DUMMY_MAP(DriverMode2Reset), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 84e61a9967acf..c58c504645665 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -154,6 +154,7 @@ static struct cmn2asic_msg_mapping sienna_cichlid_message_map[SMU_MSG_MAX_COUNT] MSG_MAP(SetGpoFeaturePMask, PPSMC_MSG_SetGpoFeaturePMask, 0), MSG_MAP(DisallowGpo, PPSMC_MSG_DisallowGpo, 0), MSG_MAP(Enable2ndUSB20Port, PPSMC_MSG_Enable2ndUSB20Port, 0), + MSG_MAP(DriverMode2Reset, PPSMC_MSG_DriverMode2Reset, 0), }; static struct cmn2asic_mapping sienna_cichlid_clk_map[SMU_CLK_COUNT] = { @@ -4254,6 +4255,57 @@ static int sienna_cichlid_stb_get_data_direct(struct smu_context *smu, return 0; } +static bool sienna_cichlid_is_mode2_reset_supported(struct smu_context *smu) +{ + return true; +} + +static int sienna_cichlid_mode2_reset(struct smu_context *smu) +{ + u32 smu_version; + int ret = 0, index; + struct amdgpu_device *adev = smu->adev; + int timeout = 100; + + smu_cmn_get_smc_version(smu, NULL, &smu_version); + + index = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_MSG, + SMU_MSG_DriverMode2Reset); + + mutex_lock(&smu->message_lock); + + ret = smu_cmn_send_msg_without_waiting(smu, (uint16_t)index, + SMU_RESET_MODE_2); + + ret = smu_cmn_wait_for_response(smu); + while (ret != 0 && timeout) { + ret = smu_cmn_wait_for_response(smu); + /* Wait a bit more time for getting ACK */ + if (ret != 0) { + --timeout; + usleep_range(500, 1000); + continue; + } else { + break; + } + } + + if (!timeout) { + dev_err(adev->dev, + "failed to send mode2 message \tparam: 0x%08x response %#x\n", + SMU_RESET_MODE_2, ret); + goto out; + } + + dev_info(smu->adev->dev, "restore config space...\n"); + /* Restore the config space saved during init */ + amdgpu_device_load_pci_state(adev->pdev); +out: + mutex_unlock(&smu->message_lock); + + return ret; +} + static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .get_allowed_feature_mask = sienna_cichlid_get_allowed_feature_mask, .set_default_dpm_table = sienna_cichlid_set_default_dpm_table, @@ -4348,6 +4400,8 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .get_default_config_table_settings = sienna_cichlid_get_default_config_table_settings, .set_config_table = sienna_cichlid_set_config_table, .get_unique_id = sienna_cichlid_get_unique_id, + .mode2_reset_is_support = sienna_cichlid_is_mode2_reset_supported, + .mode2_reset = sienna_cichlid_mode2_reset, }; void sienna_cichlid_set_ppt_funcs(struct smu_context *smu) From 45972904c128817bef5946ad62d03ba3d3cea546 Mon Sep 17 00:00:00 2001 From: Victor Zhao Date: Thu, 28 Jul 2022 10:39:23 +0800 Subject: [PATCH 12/79] drm/amdgpu: let mode2 reset fallback to default when failure - introduce AMDGPU_SKIP_MODE2_RESET flag - let mode2 reset fallback to default reset method if failed v2: move this part out from the asic specific part Signed-off-by: Victor Zhao Acked-by: Andrey Grodzovsky --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 ++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 1 + drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 1 + drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 1 + drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 1 + 9 files changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 28f27df5917e3..7a93858744ac7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -136,6 +136,7 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); amdgpu_device_gpu_recover(adev, NULL, &reset_context); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 75c43bf9dc4e3..fdb9e10623e00 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5206,6 +5206,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, reset_context->job = job; reset_context->hive = hive; + /* * Build list of devices to reset. * In case we are in XGMI hive mode, resort the device list @@ -5329,8 +5330,11 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, amdgpu_ras_resume(adev); } else { r = amdgpu_do_asic_reset(device_list_handle, reset_context); - if (r && r == -EAGAIN) + if (r && r == -EAGAIN) { + set_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags); + adev->asic_reset_res = 0; goto retry; + } } skip_hw_reset: @@ -5760,6 +5764,7 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) reset_context.reset_req_dev = adev; set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); + set_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); adev->no_hw_access = true; r = amdgpu_device_pre_asic_reset(adev, &reset_context); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 2b155ad637d28..fca04289a2d1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -71,6 +71,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 4e4460411ec50..c2d3d3e456f1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1953,6 +1953,7 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index f778466bb9dbd..831fb222139c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -74,6 +74,9 @@ int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev, { struct amdgpu_reset_handler *reset_handler = NULL; + if (test_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags)) + return -ENOSYS; + if (adev->reset_cntl && adev->reset_cntl->get_reset_handler) reset_handler = adev->reset_cntl->get_reset_handler( adev->reset_cntl, reset_context); @@ -90,6 +93,9 @@ int amdgpu_reset_perform_reset(struct amdgpu_device *adev, int ret; struct amdgpu_reset_handler *reset_handler = NULL; + if (test_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags)) + return -ENOSYS; + if (adev->reset_cntl) reset_handler = adev->reset_cntl->get_reset_handler( adev->reset_cntl, reset_context); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index ffda1560c6481..f71b83c425908 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -30,6 +30,7 @@ enum AMDGPU_RESET_FLAGS { AMDGPU_NEED_FULL_RESET = 0, AMDGPU_SKIP_HW_RESET = 1, + AMDGPU_SKIP_MODE2_RESET = 2, }; struct amdgpu_reset_context { diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 34b60e19b247a..4f66ee86086d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -291,6 +291,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); amdgpu_device_gpu_recover(adev, NULL, &reset_context); } diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index 03296aa12f2a1..329f2679ac5f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -318,6 +318,7 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); amdgpu_device_gpu_recover(adev, NULL, &reset_context); } diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index 288c414babdfa..fd14fa9b9cd7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -529,6 +529,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); amdgpu_device_gpu_recover(adev, NULL, &reset_context); } From f8e696a8c5fd1c24e273cc4cb1f1b44f631f55de Mon Sep 17 00:00:00 2001 From: Victor Zhao Date: Tue, 14 Jun 2022 10:48:11 +0800 Subject: [PATCH 13/79] drm/amdgpu: add debugfs amdgpu_reset_level Introduce amdgpu_reset_level debugfs in order to help debug and test specific type of reset. Also helps blocking unwanted type of resets. By default, mode2 reset will not be enabled v2: make this debugfs in adev and use debugfs_create_u32 Signed-off-by: Victor Zhao Acked-by: Andrey Grodzovsky --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 3 +++ 4 files changed, 18 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2c4bdc0530cc8..6a94eb5acab81 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -275,6 +275,9 @@ extern int amdgpu_vcnfw_log; #define AMDGPU_RESET_VCE (1 << 13) #define AMDGPU_RESET_VCE1 (1 << 14) +#define AMDGPU_RESET_LEVEL_SOFT_RECOVERY (1 << 0) +#define AMDGPU_RESET_LEVEL_MODE2 (1 << 1) + /* max cursor sizes (in pixels) */ #define CIK_CURSOR_WIDTH 128 #define CIK_CURSOR_HEIGHT 128 @@ -1095,6 +1098,8 @@ struct amdgpu_device { uint32_t scpm_status; struct work_struct reset_work; + + uint32_t amdgpu_reset_level_mask; }; static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 15e1893e0b74d..06d8d7a13296d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1839,6 +1839,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) } #endif + debugfs_create_u32("amdgpu_reset_level", 0600, root, &adev->amdgpu_reset_level_mask); + /* Register debugfs entries for amdgpu_ttm */ amdgpu_ttm_debugfs_init(adev); amdgpu_debugfs_pm_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 831fb222139c6..9da5ead50c900 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -37,6 +37,8 @@ int amdgpu_reset_init(struct amdgpu_device *adev) { int ret = 0; + adev->amdgpu_reset_level_mask = 0x1; + switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 2): ret = aldebaran_reset_init(adev); @@ -74,6 +76,9 @@ int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev, { struct amdgpu_reset_handler *reset_handler = NULL; + if (!(adev->amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_MODE2)) + return -ENOSYS; + if (test_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags)) return -ENOSYS; @@ -93,6 +98,9 @@ int amdgpu_reset_perform_reset(struct amdgpu_device *adev, int ret; struct amdgpu_reset_handler *reset_handler = NULL; + if (!(adev->amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_MODE2)) + return -ENOSYS; + if (test_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags)) return -ENOSYS; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index d3558c34d406c..3e316b013fd95 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -405,6 +405,9 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, { ktime_t deadline = ktime_add_us(ktime_get(), 10000); + if (!(ring->adev->amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_SOFT_RECOVERY)) + return false; + if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence) return false; From cf4015cb4da833f6b8f799494c326ca29f72bb1e Mon Sep 17 00:00:00 2001 From: Victor Zhao Date: Tue, 21 Jun 2022 16:59:44 +0800 Subject: [PATCH 14/79] drm/amdgpu: save and restore gc hub regs Save and restore gfxhub regs as they will be reset during mode 2 Signed-off-by: Victor Zhao Acked-by: Andrey Grodzovsky --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 26 +++++++ drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c | 72 +++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c | 7 +- .../include/asic_reg/gc/gc_10_3_0_offset.h | 4 ++ 5 files changed, 110 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h index beabab515836a..f8036f2b100e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h @@ -35,6 +35,8 @@ struct amdgpu_gfxhub_funcs { void (*init)(struct amdgpu_device *adev); int (*get_xgmi_info)(struct amdgpu_device *adev); void (*utcl2_harvest)(struct amdgpu_device *adev); + void (*mode2_save_regs)(struct amdgpu_device *adev); + void (*mode2_restore_regs)(struct amdgpu_device *adev); }; struct amdgpu_gfxhub { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 008eaca271519..0305b660cd17d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -264,6 +264,32 @@ struct amdgpu_gmc { u64 mall_size; /* number of UMC instances */ int num_umc; + /* mode2 save restore */ + u64 VM_L2_CNTL; + u64 VM_L2_CNTL2; + u64 VM_DUMMY_PAGE_FAULT_CNTL; + u64 VM_DUMMY_PAGE_FAULT_ADDR_LO32; + u64 VM_DUMMY_PAGE_FAULT_ADDR_HI32; + u64 VM_L2_PROTECTION_FAULT_CNTL; + u64 VM_L2_PROTECTION_FAULT_CNTL2; + u64 VM_L2_PROTECTION_FAULT_MM_CNTL3; + u64 VM_L2_PROTECTION_FAULT_MM_CNTL4; + u64 VM_L2_PROTECTION_FAULT_ADDR_LO32; + u64 VM_L2_PROTECTION_FAULT_ADDR_HI32; + u64 VM_DEBUG; + u64 VM_L2_MM_GROUP_RT_CLASSES; + u64 VM_L2_BANK_SELECT_RESERVED_CID; + u64 VM_L2_BANK_SELECT_RESERVED_CID2; + u64 VM_L2_CACHE_PARITY_CNTL; + u64 VM_L2_IH_LOG_CNTL; + u64 VM_CONTEXT_CNTL[16]; + u64 VM_CONTEXT_PAGE_TABLE_BASE_ADDR_LO32[16]; + u64 VM_CONTEXT_PAGE_TABLE_BASE_ADDR_HI32[16]; + u64 VM_CONTEXT_PAGE_TABLE_START_ADDR_LO32[16]; + u64 VM_CONTEXT_PAGE_TABLE_START_ADDR_HI32[16]; + u64 VM_CONTEXT_PAGE_TABLE_END_ADDR_LO32[16]; + u64 VM_CONTEXT_PAGE_TABLE_END_ADDR_HI32[16]; + u64 MC_VM_MX_L1_TLB_CNTL; }; #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index d8c5315811169..51cf8acd2d796 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -576,6 +576,76 @@ static void gfxhub_v2_1_utcl2_harvest(struct amdgpu_device *adev) } } +static void gfxhub_v2_1_save_regs(struct amdgpu_device *adev) +{ + int i; + adev->gmc.VM_L2_CNTL = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL); + adev->gmc.VM_L2_CNTL2 = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2); + adev->gmc.VM_DUMMY_PAGE_FAULT_CNTL = RREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_CNTL); + adev->gmc.VM_DUMMY_PAGE_FAULT_ADDR_LO32 = RREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_ADDR_LO32); + adev->gmc.VM_DUMMY_PAGE_FAULT_ADDR_HI32 = RREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_ADDR_HI32); + adev->gmc.VM_L2_PROTECTION_FAULT_CNTL = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL); + adev->gmc.VM_L2_PROTECTION_FAULT_CNTL2 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL2); + adev->gmc.VM_L2_PROTECTION_FAULT_MM_CNTL3 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_MM_CNTL3); + adev->gmc.VM_L2_PROTECTION_FAULT_MM_CNTL4 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_MM_CNTL4); + adev->gmc.VM_L2_PROTECTION_FAULT_ADDR_LO32 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_ADDR_LO32); + adev->gmc.VM_L2_PROTECTION_FAULT_ADDR_HI32 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_ADDR_HI32); + adev->gmc.VM_DEBUG = RREG32_SOC15(GC, 0, mmGCVM_DEBUG); + adev->gmc.VM_L2_MM_GROUP_RT_CLASSES = RREG32_SOC15(GC, 0, mmGCVM_L2_MM_GROUP_RT_CLASSES); + adev->gmc.VM_L2_BANK_SELECT_RESERVED_CID = RREG32_SOC15(GC, 0, mmGCVM_L2_BANK_SELECT_RESERVED_CID); + adev->gmc.VM_L2_BANK_SELECT_RESERVED_CID2 = RREG32_SOC15(GC, 0, mmGCVM_L2_BANK_SELECT_RESERVED_CID2); + adev->gmc.VM_L2_CACHE_PARITY_CNTL = RREG32_SOC15(GC, 0, mmGCVM_L2_CACHE_PARITY_CNTL); + adev->gmc.VM_L2_IH_LOG_CNTL = RREG32_SOC15(GC, 0, mmGCVM_L2_IH_LOG_CNTL); + + for (i = 0; i <= 15; i++) { + adev->gmc.VM_CONTEXT_CNTL[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, i); + adev->gmc.VM_CONTEXT_PAGE_TABLE_BASE_ADDR_LO32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, i * 2); + adev->gmc.VM_CONTEXT_PAGE_TABLE_BASE_ADDR_HI32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, i * 2); + adev->gmc.VM_CONTEXT_PAGE_TABLE_START_ADDR_LO32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, i * 2); + adev->gmc.VM_CONTEXT_PAGE_TABLE_START_ADDR_HI32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, i * 2); + adev->gmc.VM_CONTEXT_PAGE_TABLE_END_ADDR_LO32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, i * 2); + adev->gmc.VM_CONTEXT_PAGE_TABLE_END_ADDR_HI32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, i * 2); + } + + adev->gmc.MC_VM_MX_L1_TLB_CNTL = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL); +} + +static void gfxhub_v2_1_restore_regs(struct amdgpu_device *adev) +{ + int i; + WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL, adev->gmc.VM_L2_CNTL); + WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2, adev->gmc.VM_L2_CNTL2); + WREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_CNTL, adev->gmc.VM_DUMMY_PAGE_FAULT_CNTL); + WREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_ADDR_LO32, adev->gmc.VM_DUMMY_PAGE_FAULT_ADDR_LO32); + WREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_ADDR_HI32, adev->gmc.VM_DUMMY_PAGE_FAULT_ADDR_HI32); + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL, adev->gmc.VM_L2_PROTECTION_FAULT_CNTL); + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL2, adev->gmc.VM_L2_PROTECTION_FAULT_CNTL2); + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_MM_CNTL3, adev->gmc.VM_L2_PROTECTION_FAULT_MM_CNTL3); + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_MM_CNTL4, adev->gmc.VM_L2_PROTECTION_FAULT_MM_CNTL4); + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_ADDR_LO32, adev->gmc.VM_L2_PROTECTION_FAULT_ADDR_LO32); + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_ADDR_HI32, adev->gmc.VM_L2_PROTECTION_FAULT_ADDR_HI32); + WREG32_SOC15(GC, 0, mmGCVM_DEBUG, adev->gmc.VM_DEBUG); + WREG32_SOC15(GC, 0, mmGCVM_L2_MM_GROUP_RT_CLASSES, adev->gmc.VM_L2_MM_GROUP_RT_CLASSES); + WREG32_SOC15(GC, 0, mmGCVM_L2_BANK_SELECT_RESERVED_CID, adev->gmc.VM_L2_BANK_SELECT_RESERVED_CID); + WREG32_SOC15(GC, 0, mmGCVM_L2_BANK_SELECT_RESERVED_CID2, adev->gmc.VM_L2_BANK_SELECT_RESERVED_CID2); + WREG32_SOC15(GC, 0, mmGCVM_L2_CACHE_PARITY_CNTL, adev->gmc.VM_L2_CACHE_PARITY_CNTL); + WREG32_SOC15(GC, 0, mmGCVM_L2_IH_LOG_CNTL, adev->gmc.VM_L2_IH_LOG_CNTL); + + for (i = 0; i <= 15; i++) { + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, i, adev->gmc.VM_CONTEXT_CNTL[i]); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_BASE_ADDR_LO32[i]); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_BASE_ADDR_HI32[i]); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_START_ADDR_LO32[i]); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_START_ADDR_HI32[i]); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_END_ADDR_LO32[i]); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_END_ADDR_HI32[i]); + } + + WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_BASE, adev->gmc.vram_start >> 24); + WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_TOP, adev->gmc.vram_end >> 24); + WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, adev->gmc.MC_VM_MX_L1_TLB_CNTL); +} + const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = { .get_fb_location = gfxhub_v2_1_get_fb_location, .get_mc_fb_offset = gfxhub_v2_1_get_mc_fb_offset, @@ -586,4 +656,6 @@ const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = { .init = gfxhub_v2_1_init, .get_xgmi_info = gfxhub_v2_1_get_xgmi_info, .utcl2_harvest = gfxhub_v2_1_utcl2_harvest, + .mode2_save_regs = gfxhub_v2_1_save_regs, + .mode2_restore_regs = gfxhub_v2_1_restore_regs, }; diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c index b61a8ddec7ef0..2dbbda17848b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c +++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c @@ -94,8 +94,11 @@ sienna_cichlid_mode2_prepare_hwcontext(struct amdgpu_reset_control *reset_ctl, int r = 0; struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; - if (!amdgpu_sriov_vf(adev)) + if (!amdgpu_sriov_vf(adev)) { + if (adev->gfxhub.funcs->mode2_save_regs) + adev->gfxhub.funcs->mode2_save_regs(adev); r = sienna_cichlid_mode2_suspend_ip(adev); + } return r; } @@ -151,6 +154,8 @@ static int sienna_cichlid_mode2_restore_ip(struct amdgpu_device *adev) } /* Reinit GFXHUB */ + if (adev->gfxhub.funcs->mode2_restore_regs) + adev->gfxhub.funcs->mode2_restore_regs(adev); adev->gfxhub.funcs->init(adev); r = adev->gfxhub.funcs->gart_enable(adev); if (r) { diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h index 6d79630c2d16f..b7c1445b0bed7 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h @@ -3131,6 +3131,8 @@ #define mmGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32_BASE_IDX 0 #define mmGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32 0x15cc #define mmGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32_BASE_IDX 0 +#define mmGCVM_DEBUG 0x15cd +#define mmGCVM_DEBUG_BASE_IDX 0 #define mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32 0x15ce #define mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32_BASE_IDX 0 #define mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32 0x15cf @@ -3153,6 +3155,8 @@ #define mmGCVM_L2_BANK_SELECT_RESERVED_CID2_BASE_IDX 0 #define mmGCVM_L2_CACHE_PARITY_CNTL 0x15d8 #define mmGCVM_L2_CACHE_PARITY_CNTL_BASE_IDX 0 +#define mmGCVM_L2_IH_LOG_CNTL 0x15d9 +#define mmGCVM_L2_IH_LOG_CNTL_BASE_IDX 0 #define mmGCVM_L2_CNTL5 0x15dc #define mmGCVM_L2_CNTL5_BASE_IDX 0 #define mmGCVM_L2_GCR_CNTL 0x15dd From eac97f84a71b9672b9ed5e5c0a2eded51cf1e0b8 Mon Sep 17 00:00:00 2001 From: Victor Zhao Date: Fri, 24 Jun 2022 11:59:21 +0800 Subject: [PATCH 15/79] drm/amdgpu: revert context to stop engine before mode2 reset For some hang caused by slow tests, engine cannot be stopped which may cause resume failure after reset. In this case, force halt engine by reverting context addresses Signed-off-by: Victor Zhao Acked-by: Andrey Grodzovsky --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h | 1 + drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c | 36 +++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c | 2 ++ 4 files changed, 40 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index fdb9e10623e00..ebb722811dcf7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5097,6 +5097,7 @@ static void amdgpu_device_recheck_guilty_jobs( /* set guilty */ drm_sched_increase_karma(s_job); + amdgpu_reset_prepare_hwcontext(adev, reset_context); retry: /* do hw reset */ if (amdgpu_sriov_vf(adev)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h index f8036f2b100e4..c7b44aeb671b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h @@ -37,6 +37,7 @@ struct amdgpu_gfxhub_funcs { void (*utcl2_harvest)(struct amdgpu_device *adev); void (*mode2_save_regs)(struct amdgpu_device *adev); void (*mode2_restore_regs)(struct amdgpu_device *adev); + void (*halt)(struct amdgpu_device *adev); }; struct amdgpu_gfxhub { diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index 51cf8acd2d796..8cf53e039c115 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -646,6 +646,41 @@ static void gfxhub_v2_1_restore_regs(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, adev->gmc.MC_VM_MX_L1_TLB_CNTL); } +static void gfxhub_v2_1_halt(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + int i; + uint32_t tmp; + int time = 1000; + + gfxhub_v2_1_set_fault_enable_default(adev, false); + + for (i = 0; i <= 14; i++) { + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, ~0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, ~0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + 0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + 0); + } + tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); + while ((tmp & (GRBM_STATUS2__EA_BUSY_MASK | + GRBM_STATUS2__EA_LINK_BUSY_MASK)) != 0 && + time) { + udelay(100); + time--; + tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); + } + + if (!time) { + DRM_WARN("failed to wait for GRBM(EA) idle\n"); + } +} + const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = { .get_fb_location = gfxhub_v2_1_get_fb_location, .get_mc_fb_offset = gfxhub_v2_1_get_mc_fb_offset, @@ -658,4 +693,5 @@ const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = { .utcl2_harvest = gfxhub_v2_1_utcl2_harvest, .mode2_save_regs = gfxhub_v2_1_save_regs, .mode2_restore_regs = gfxhub_v2_1_restore_regs, + .halt = gfxhub_v2_1_halt, }; diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c index 2dbbda17848b3..7aa570c1ce4a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c +++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c @@ -97,6 +97,8 @@ sienna_cichlid_mode2_prepare_hwcontext(struct amdgpu_reset_control *reset_ctl, if (!amdgpu_sriov_vf(adev)) { if (adev->gfxhub.funcs->mode2_save_regs) adev->gfxhub.funcs->mode2_save_regs(adev); + if (adev->gfxhub.funcs->halt) + adev->gfxhub.funcs->halt(adev); r = sienna_cichlid_mode2_suspend_ip(adev); } From af0029c7a35d0ab1aeff4d897ae5bd00a3847d3f Mon Sep 17 00:00:00 2001 From: Victor Zhao Date: Fri, 24 Jun 2022 12:00:06 +0800 Subject: [PATCH 16/79] drm/amdgpu: reduce reset time In multi container use case, reset time is important, so skip ring tests and cp halt wait during ip suspending for reset as they are going to fail and cost more time on reset v2: add a hang flag to indicate the reset comes from a job timeout, skip ring test and cp halt wait in this case v3: move hang flag to adev Signed-off-by: Victor Zhao Acked-by: Andrey Grodzovsky --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 9 +++++++-- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6a94eb5acab81..1372e2b475418 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1100,6 +1100,7 @@ struct amdgpu_device { struct work_struct reset_work; uint32_t amdgpu_reset_level_mask; + bool job_hang; }; static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 222d3d7ea0767..454a78ba60d43 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -477,7 +477,7 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev) kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i], RESET_QUEUES, 0, 0); - if (adev->gfx.kiq.ring.sched.ready) + if (adev->gfx.kiq.ring.sched.ready && !adev->job_hang) r = amdgpu_ring_test_helper(kiq_ring); spin_unlock(&adev->gfx.kiq.ring_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index fca04289a2d1c..723721bdd6bf9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -49,6 +49,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) } memset(&ti, 0, sizeof(struct amdgpu_task_info)); + adev->job_hang = true; if (amdgpu_gpu_recovery && amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) { @@ -83,6 +84,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) } exit: + adev->job_hang = false; drm_dev_exit(idx); return DRM_GPU_SCHED_STAT_NOMINAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 53bc585246169..f61d7c7c9d262 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -5994,6 +5994,9 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); } + if (adev->job_hang && !enable) + return 0; + for (i = 0; i < adev->usec_timeout; i++) { if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0) break; @@ -7592,8 +7595,10 @@ static int gfx_v10_0_kiq_disable_kgq(struct amdgpu_device *adev) for (i = 0; i < adev->gfx.num_gfx_rings; i++) kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i], PREEMPT_QUEUES, 0, 0); - - return amdgpu_ring_test_helper(kiq_ring); + if (!adev->job_hang) + return amdgpu_ring_test_helper(kiq_ring); + else + return 0; } #endif From dd1adf9e47a179f6ff159e938a7a681fec850822 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Thu, 4 Aug 2022 16:44:56 +0800 Subject: [PATCH 17/79] drm/amd/display: change family id name for DCN314 GC version is 11.0.1 rather than 11.0.2 Signed-off-by: Yifan Zhang Reviewed-by: Roman Li --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 4 ++-- drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c | 4 ++-- drivers/gpu/drm/amd/display/dc/core/dc.c | 2 +- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 2 +- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 2 +- drivers/gpu/drm/amd/display/include/dal_asic_id.h | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 08b1d10e021c9..79ca880c181bb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -802,7 +802,7 @@ static int get_plane_modifiers(struct amdgpu_device *adev, unsigned int plane_ty add_gfx10_1_modifiers(adev, mods, &size, &capacity); break; case AMDGPU_FAMILY_GC_11_0_0: - case AMDGPU_FAMILY_GC_11_0_2: + case AMDGPU_FAMILY_GC_11_0_1: add_gfx11_modifiers(adev, mods, &size, &capacity); break; } @@ -1638,7 +1638,7 @@ static bool dm_plane_format_mod_supported(struct drm_plane *plane, } break; case AMDGPU_FAMILY_GC_11_0_0: - case AMDGPU_FAMILY_GC_11_0_2: + case AMDGPU_FAMILY_GC_11_0_1: switch (AMD_FMT_MOD_GET(TILE, modifier)) { case AMD_FMT_MOD_TILE_GFX11_256K_R_X: case AMD_FMT_MOD_TILE_GFX9_64K_R_X: diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c index 4c76091fd1f21..f276abb63bcd7 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c @@ -337,7 +337,7 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p break; } - case AMDGPU_FAMILY_GC_11_0_2: { + case AMDGPU_FAMILY_GC_11_0_1: { struct clk_mgr_dcn314 *clk_mgr = kzalloc(sizeof(*clk_mgr), GFP_KERNEL); if (clk_mgr == NULL) { @@ -397,7 +397,7 @@ void dc_destroy_clk_mgr(struct clk_mgr *clk_mgr_base) dcn32_clk_mgr_destroy(clk_mgr); break; - case AMDGPU_FAMILY_GC_11_0_2: + case AMDGPU_FAMILY_GC_11_0_1: dcn314_clk_mgr_destroy(clk_mgr); break; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 382daa5c9ec15..4e778f5140b81 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -4332,7 +4332,7 @@ bool dc_is_dmub_outbox_supported(struct dc *dc) !dc->debug.dpia_debug.bits.disable_dpia) return true; - if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_2 && + if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_1 && !dc->debug.dpia_debug.bits.disable_dpia) return true; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 6911fea69a521..89f80a3bb9f2e 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -3379,7 +3379,7 @@ bool dc_link_setup_psr(struct dc_link *link, switch(link->ctx->asic_id.chip_family) { case FAMILY_YELLOW_CARP: case AMDGPU_FAMILY_GC_10_3_6: - case AMDGPU_FAMILY_GC_11_0_2: + case AMDGPU_FAMILY_GC_11_0_1: if(!dc->debug.disable_z10) psr_context->psr_level.bits.SKIP_CRTC_DISABLE = false; break; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index e2d3b0beaa39a..59b07f743d99d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -169,7 +169,7 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id) if (ASICREV_IS_GC_11_0_2(asic_id.hw_internal_rev)) dc_version = DCN_VERSION_3_21; break; - case AMDGPU_FAMILY_GC_11_0_2: + case AMDGPU_FAMILY_GC_11_0_1: dc_version = DCN_VERSION_3_14; break; default: diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 704a610d5ffe8..dfbdc7fd3adf9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -2165,7 +2165,7 @@ static bool dcn31_resource_construct( pool->base.usb4_dpia_count = 4; } - if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_2) + if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_1) pool->base.usb4_dpia_count = 4; /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */ diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index ab06c7fc74524..e054f3494087f 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -244,7 +244,7 @@ enum { #define ASICREV_IS_GC_10_3_7(eChipRev) ((eChipRev >= GC_10_3_7_A0) && (eChipRev < GC_10_3_7_UNKNOWN)) #define AMDGPU_FAMILY_GC_11_0_0 145 -#define AMDGPU_FAMILY_GC_11_0_2 148 +#define AMDGPU_FAMILY_GC_11_0_1 148 #define GC_11_0_0_A0 0x1 #define GC_11_0_2_A0 0x10 #define GC_11_UNKNOWN 0xFF From 201778cd2156f30690c0054d0c395b922c350489 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 4 Aug 2022 14:01:04 -0100 Subject: [PATCH 18/79] Documentation/amdgpu_dm: Add DM color correction documentation AMDGPU DM maps DRM color management properties (degamma, ctm and gamma) to DC color correction entities. Part of this mapping is already documented as code comments and can be converted as kernel docs. v2: - rebase to amd-staging-drm-next - fix typos (Tales) - undo kernel-docs inside functions (Tales) Signed-off-by: Melissa Wen Reviewed-by: Harry Wentland Reviewed-by: Tales Aparecida Reviewed-by: Rodrigo Siqueira --- .../gpu/amdgpu/display/display-manager.rst | 9 ++ .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 109 +++++++++++++----- 2 files changed, 90 insertions(+), 28 deletions(-) diff --git a/Documentation/gpu/amdgpu/display/display-manager.rst b/Documentation/gpu/amdgpu/display/display-manager.rst index 7ce31f89d9a09..b1b0f11aed831 100644 --- a/Documentation/gpu/amdgpu/display/display-manager.rst +++ b/Documentation/gpu/amdgpu/display/display-manager.rst @@ -40,3 +40,12 @@ Atomic Implementation .. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c :functions: amdgpu_dm_atomic_check amdgpu_dm_atomic_commit_tail + +Color Management Properties +=========================== + +.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c + :doc: overview + +.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c + :internal: diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index a71177305bcdf..a4cb23d059bd6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -29,7 +29,9 @@ #include "modules/color/color_gamma.h" #include "basics/conversion.h" -/* +/** + * DOC: overview + * * The DC interface to HW gives us the following color management blocks * per pipe (surface): * @@ -71,8 +73,8 @@ #define MAX_DRM_LUT_VALUE 0xFFFF -/* - * Initialize the color module. +/** + * amdgpu_dm_init_color_mod - Initialize the color module. * * We're not using the full color module, only certain components. * Only call setup functions for components that we need. @@ -82,7 +84,14 @@ void amdgpu_dm_init_color_mod(void) setup_x_points_distribution(); } -/* Extracts the DRM lut and lut size from a blob. */ +/** + * __extract_blob_lut - Extracts the DRM lut and lut size from a blob. + * @blob: DRM color mgmt property blob + * @size: lut size + * + * Returns: + * DRM LUT or NULL + */ static const struct drm_color_lut * __extract_blob_lut(const struct drm_property_blob *blob, uint32_t *size) { @@ -90,13 +99,18 @@ __extract_blob_lut(const struct drm_property_blob *blob, uint32_t *size) return blob ? (struct drm_color_lut *)blob->data : NULL; } -/* - * Return true if the given lut is a linear mapping of values, i.e. it acts - * like a bypass LUT. +/** + * __is_lut_linear - check if the given lut is a linear mapping of values + * @lut: given lut to check values + * @size: lut size * * It is considered linear if the lut represents: - * f(a) = (0xFF00/MAX_COLOR_LUT_ENTRIES-1)a; for integer a in - * [0, MAX_COLOR_LUT_ENTRIES) + * f(a) = (0xFF00/MAX_COLOR_LUT_ENTRIES-1)a; for integer a in [0, + * MAX_COLOR_LUT_ENTRIES) + * + * Returns: + * True if the given lut is a linear mapping of values, i.e. it acts like a + * bypass LUT. Otherwise, false. */ static bool __is_lut_linear(const struct drm_color_lut *lut, uint32_t size) { @@ -119,9 +133,13 @@ static bool __is_lut_linear(const struct drm_color_lut *lut, uint32_t size) return true; } -/* - * Convert the drm_color_lut to dc_gamma. The conversion depends on the size - * of the lut - whether or not it's legacy. +/** + * __drm_lut_to_dc_gamma - convert the drm_color_lut to dc_gamma. + * @lut: DRM lookup table for color conversion + * @gamma: DC gamma to set entries + * @is_legacy: legacy or atomic gamma + * + * The conversion depends on the size of the lut - whether or not it's legacy. */ static void __drm_lut_to_dc_gamma(const struct drm_color_lut *lut, struct dc_gamma *gamma, bool is_legacy) @@ -154,8 +172,11 @@ static void __drm_lut_to_dc_gamma(const struct drm_color_lut *lut, } } -/* - * Converts a DRM CTM to a DC CSC float matrix. +/** + * __drm_ctm_to_dc_matrix - converts a DRM CTM to a DC CSC float matrix + * @ctm: DRM color transformation matrix + * @matrix: DC CSC float matrix + * * The matrix needs to be a 3x4 (12 entry) matrix. */ static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm, @@ -189,7 +210,18 @@ static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm, } } -/* Calculates the legacy transfer function - only for sRGB input space. */ +/** + * __set_legacy_tf - Calculates the legacy transfer function + * @func: transfer function + * @lut: lookup table that defines the color space + * @lut_size: size of respective lut + * @has_rom: if ROM can be used for hardcoded curve + * + * Only for sRGB input space + * + * Returns: + * 0 in case of success, -ENOMEM if fails + */ static int __set_legacy_tf(struct dc_transfer_func *func, const struct drm_color_lut *lut, uint32_t lut_size, bool has_rom) @@ -218,7 +250,16 @@ static int __set_legacy_tf(struct dc_transfer_func *func, return res ? 0 : -ENOMEM; } -/* Calculates the output transfer function based on expected input space. */ +/** + * __set_output_tf - calculates the output transfer function based on expected input space. + * @func: transfer function + * @lut: lookup table that defines the color space + * @lut_size: size of respective lut + * @has_rom: if ROM can be used for hardcoded curve + * + * Returns: + * 0 in case of success. -ENOMEM if fails. + */ static int __set_output_tf(struct dc_transfer_func *func, const struct drm_color_lut *lut, uint32_t lut_size, bool has_rom) @@ -262,7 +303,16 @@ static int __set_output_tf(struct dc_transfer_func *func, return res ? 0 : -ENOMEM; } -/* Caculates the input transfer function based on expected input space. */ +/** + * __set_input_tf - calculates the input transfer function based on expected + * input space. + * @func: transfer function + * @lut: lookup table that defines the color space + * @lut_size: size of respective lut. + * + * Returns: + * 0 in case of success. -ENOMEM if fails. + */ static int __set_input_tf(struct dc_transfer_func *func, const struct drm_color_lut *lut, uint32_t lut_size) { @@ -285,13 +335,14 @@ static int __set_input_tf(struct dc_transfer_func *func, } /** - * amdgpu_dm_verify_lut_sizes + * amdgpu_dm_verify_lut_sizes - verifies if DRM luts match the hw supported sizes * @crtc_state: the DRM CRTC state * - * Verifies that the Degamma and Gamma LUTs attached to the |crtc_state| are of - * the expected size. + * Verifies that the Degamma and Gamma LUTs attached to the &crtc_state + * are of the expected size. * - * Returns 0 on success. + * Returns: + * 0 on success. -EINVAL if any lut sizes are invalid. */ int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state) { @@ -327,9 +378,9 @@ int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state) * of the HW blocks as long as the CRTC CTM always comes before the * CRTC RGM and after the CRTC DGM. * - * The CRTC RGM block will be placed in the RGM LUT block if it is non-linear. - * The CRTC DGM block will be placed in the DGM LUT block if it is non-linear. - * The CRTC CTM will be placed in the gamut remap block if it is non-linear. + * - The CRTC RGM block will be placed in the RGM LUT block if it is non-linear. + * - The CRTC DGM block will be placed in the DGM LUT block if it is non-linear. + * - The CRTC CTM will be placed in the gamut remap block if it is non-linear. * * The RGM block is typically more fully featured and accurate across * all ASICs - DCE can't support a custom non-linear CRTC DGM. @@ -338,7 +389,8 @@ int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state) * management at once we have to either restrict the usage of CRTC properties * or blend adjustments together. * - * Returns 0 on success. + * Returns: + * 0 on success. Error code if setup fails. */ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) { @@ -393,7 +445,7 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) if (r) return r; } else if (has_regamma) { - /* CRTC RGM goes into RGM LUT. */ + /* If atomic regamma, CRTC RGM goes into RGM LUT. */ stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS; stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; @@ -450,9 +502,10 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) * * Update the underlying dc_stream_state's input transfer function (ITF) in * preparation for hardware commit. The transfer function used depends on - * the prepartion done on the stream for color management. + * the preparation done on the stream for color management. * - * Returns 0 on success. + * Returns: + * 0 on success. -ENOMEM if mem allocation fails. */ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, struct dc_plane_state *dc_plane_state) From 9288cef96232c2f66826bda98498173b64483d9f Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 4 Aug 2022 14:01:05 -0100 Subject: [PATCH 19/79] Documentation/amdgpu/display: add DC color caps info Add details about color correction capabilities and explain a bit about differences between DC hw generations and also how they are mapped between DRM and DC interface. Two schemas for DCN 2.0 and 3.0 (converted to svg from the original png) is included to illustrate it. They were obtained from a discussion[1] in the amd-gfx mailing list. [1] https://lore.kernel.org/amd-gfx/20220422142811.dm6vtk6v64jcwydk@mail.igalia.com/ v1: - remove redundant comments (Harry) - fix typos (Harry) v2: - reword introduction of color section - add co-dev tag for Harry - who provided most of the info - fix typos (Tales) - describe missing struct parameters (Tales and Siqueira) Co-developed-by: Harry Wentland Signed-off-by: Harry Wentland Signed-off-by: Melissa Wen Reviewed-by: Tales Aparecida Reviewed-by: Rodrigo Siqueira --- .../amdgpu/display/dcn2_cm_drm_current.svg | 1370 +++++++++++++++ .../amdgpu/display/dcn3_cm_drm_current.svg | 1529 +++++++++++++++++ .../gpu/amdgpu/display/display-manager.rst | 34 + drivers/gpu/drm/amd/display/dc/dc.h | 77 +- 4 files changed, 2997 insertions(+), 13 deletions(-) create mode 100644 Documentation/gpu/amdgpu/display/dcn2_cm_drm_current.svg create mode 100644 Documentation/gpu/amdgpu/display/dcn3_cm_drm_current.svg diff --git a/Documentation/gpu/amdgpu/display/dcn2_cm_drm_current.svg b/Documentation/gpu/amdgpu/display/dcn2_cm_drm_current.svg new file mode 100644 index 0000000000000..315ffc5a1a4b0 --- /dev/null +++ b/Documentation/gpu/amdgpu/display/dcn2_cm_drm_current.svg @@ -0,0 +1,1370 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Matrix + 1D LUT + 3D LUT + Unpacking + Other + drm_framebuffer + format + drm_plane + drm_crtc + Stream + MPC + DPP + + Blender + Degamma + CTM + Gamma + format + bias_and_scale + color space matrix + input_csc_color_matrix + in_transfer_func + hdr_mult + gamut_remap_matrix + in_shaper_func + lut3d_func + blend_tf + Blender + gamut_remap_matrix + func_shaper + lut3d_func + out_transfer_func + csc_color_matrix + bit_depth_param + clamping + output_color_space + Plane + Legend + DCN 2.0 + DC Interface + DRM Interface + + CNVC + Input CSC + DeGammaRAM and ROM(sRGB, BT2020 + HDR Multiply + Gamut Remap + Shaper LUTRAM + 3D LUTRAM + Blend Gamma + Blender + GammaRAM + OCSC + + + color_encoding + + pixel_blend_mode + + color_range + + + + + + + + + + + + + + diff --git a/Documentation/gpu/amdgpu/display/dcn3_cm_drm_current.svg b/Documentation/gpu/amdgpu/display/dcn3_cm_drm_current.svg new file mode 100644 index 0000000000000..7299ee9b6d649 --- /dev/null +++ b/Documentation/gpu/amdgpu/display/dcn3_cm_drm_current.svg @@ -0,0 +1,1529 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Matrix + 1D LUT + 3D LUT + Unpacking + Other + drm_framebuffer + format + drm_plane + drm_crtc + Stream + MPC + DPP + + Blender + Degamma + CTM + Gamma + format + bias_and_scale + color space matrix + input_csc_color_matrix + in_transfer_func + hdr_mult + gamut_remap_matrix + in_shaper_func + lut3d_func + blend_tf + Blender + gamut_remap_matrix + func_shaper + lut3d_func + out_transfer_func + csc_color_matrix + bit_depth_param + clamping + output_color_space + Plane + Legend + DCN 3.0 + DC Interface + DRM Interface + + CNVC + Input CSC + DeGammaROM(sRGB, BT2020, Gamma 2.2,PQ, HLG) + Post CSC + Gamma Correction + HDR Multiply + Gamut Remap + Shaper LUTRAM + 3D LUTRAM + Blend Gamma + Blender + Gamut Remap + Shaper LUTRAM + 3D LUTRAM + GammaRAM + OCSC + + + color_encoding + + pixel_blend_mode + + color_range + + + + + + + + + + + + + + + + + + diff --git a/Documentation/gpu/amdgpu/display/display-manager.rst b/Documentation/gpu/amdgpu/display/display-manager.rst index b1b0f11aed831..88e2c08c70144 100644 --- a/Documentation/gpu/amdgpu/display/display-manager.rst +++ b/Documentation/gpu/amdgpu/display/display-manager.rst @@ -49,3 +49,37 @@ Color Management Properties .. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c :internal: + + +DC Color Capabilities between DCN generations +--------------------------------------------- + +DRM/KMS framework defines three CRTC color correction properties: degamma, +color transformation matrix (CTM) and gamma, and two properties for degamma and +gamma LUT sizes. AMD DC programs some of the color correction features +pre-blending but DRM/KMS has not per-plane color correction properties. + +In general, the DRM CRTC color properties are programmed to DC, as follows: +CRTC gamma after blending, and CRTC degamma pre-blending. Although CTM is +programmed after blending, it is mapped to DPP hw blocks (pre-blending). Other +color caps available in the hw is not currently exposed by DRM interface and +are bypassed. + +.. kernel-doc:: drivers/gpu/drm/amd/display/dc/dc.h + :doc: color-management-caps + +.. kernel-doc:: drivers/gpu/drm/amd/display/dc/dc.h + :internal: + +The color pipeline has undergone major changes between DCN hardware +generations. What's possible to do before and after blending depends on +hardware capabilities, as illustrated below by the DCN 2.0 and DCN 3.0 families +schemas. + +**DCN 2.0 family color caps and mapping** + +.. kernel-figure:: dcn2_cm_drm_current.svg + +**DCN 3.0 family color caps and mapping** + +.. kernel-figure:: dcn3_cm_drm_current.svg diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index adef5583963d4..e697bad17573d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -118,7 +118,26 @@ struct dc_plane_cap { uint32_t min_height; }; -// Color management caps (DPP and MPC) +/** + * DOC: color-management-caps + * + * **Color management caps (DPP and MPC)** + * + * Modules/color calculates various color operations which are translated to + * abstracted HW. DCE 5-12 had almost no important changes, but starting with + * DCN1, every new generation comes with fairly major differences in color + * pipeline. Therefore, we abstract color pipe capabilities so modules/DM can + * decide mapping to HW block based on logical capabilities. + */ + +/** + * struct rom_curve_caps - predefined transfer function caps for degamma and regamma + * @srgb: RGB color space transfer func + * @bt2020: BT.2020 transfer func + * @gamma2_2: standard gamma + * @pq: perceptual quantizer transfer function + * @hlg: hybrid log–gamma transfer function + */ struct rom_curve_caps { uint16_t srgb : 1; uint16_t bt2020 : 1; @@ -127,36 +146,68 @@ struct rom_curve_caps { uint16_t hlg : 1; }; +/** + * struct dpp_color_caps - color pipeline capabilities for display pipe and + * plane blocks + * + * @dcn_arch: all DCE generations treated the same + * @input_lut_shared: shared with DGAM. Input LUT is different than most LUTs, + * just plain 256-entry lookup + * @icsc: input color space conversion + * @dgam_ram: programmable degamma LUT + * @post_csc: post color space conversion, before gamut remap + * @gamma_corr: degamma correction + * @hw_3d_lut: 3D LUT support. It implies a shaper LUT before. It may be shared + * with MPC by setting mpc:shared_3d_lut flag + * @ogam_ram: programmable out/blend gamma LUT + * @ocsc: output color space conversion + * @dgam_rom_for_yuv: pre-defined degamma LUT for YUV planes + * @dgam_rom_caps: pre-definied curve caps for degamma 1D LUT + * @ogam_rom_caps: pre-definied curve caps for regamma 1D LUT + * + * Note: hdr_mult and gamut remap (CTM) are always available in DPP (in that order) + */ struct dpp_color_caps { - uint16_t dcn_arch : 1; // all DCE generations treated the same - // input lut is different than most LUTs, just plain 256-entry lookup - uint16_t input_lut_shared : 1; // shared with DGAM + uint16_t dcn_arch : 1; + uint16_t input_lut_shared : 1; uint16_t icsc : 1; uint16_t dgam_ram : 1; - uint16_t post_csc : 1; // before gamut remap + uint16_t post_csc : 1; uint16_t gamma_corr : 1; - - // hdr_mult and gamut remap always available in DPP (in that order) - // 3d lut implies shaper LUT, - // it may be shared with MPC - check MPC:shared_3d_lut flag uint16_t hw_3d_lut : 1; - uint16_t ogam_ram : 1; // blnd gam + uint16_t ogam_ram : 1; uint16_t ocsc : 1; uint16_t dgam_rom_for_yuv : 1; struct rom_curve_caps dgam_rom_caps; struct rom_curve_caps ogam_rom_caps; }; +/** + * struct mpc_color_caps - color pipeline capabilities for multiple pipe and + * plane combined blocks + * + * @gamut_remap: color transformation matrix + * @ogam_ram: programmable out gamma LUT + * @ocsc: output color space conversion matrix + * @num_3dluts: MPC 3D LUT; always assumes a preceding shaper LUT + * @shared_3d_lut: shared 3D LUT flag. Can be either DPP or MPC, but single + * instance + * @ogam_rom_caps: pre-definied curve caps for regamma 1D LUT + */ struct mpc_color_caps { uint16_t gamut_remap : 1; uint16_t ogam_ram : 1; uint16_t ocsc : 1; - uint16_t num_3dluts : 3; //3d lut always assumes a preceding shaper LUT - uint16_t shared_3d_lut:1; //can be in either DPP or MPC, but single instance - + uint16_t num_3dluts : 3; + uint16_t shared_3d_lut:1; struct rom_curve_caps ogam_rom_caps; }; +/** + * struct dc_color_caps - color pipes capabilities for DPP and MPC hw blocks + * @dpp: color pipes caps for DPP + * @mpc: color pipes caps for MPC + */ struct dc_color_caps { struct dpp_color_caps dpp; struct mpc_color_caps mpc; From cba4fc68280c6ea55a44a884cca83b6875966bb4 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 4 Aug 2022 14:01:06 -0100 Subject: [PATCH 20/79] drm/amd/display: add doc entries for MPC blending configuration Describe structs and enums used to set blend mode properties to MPC blocks. Some pieces of information are already available as code comments, and were just formatted. Others were collected and summarised from discussions on AMD issue tracker[1][2]. [1] https://gitlab.freedesktop.org/drm/amd/-/issues/1734 [2] https://gitlab.freedesktop.org/drm/amd/-/issues/1769 v2: - fix typos (Tales) - add MPCC to MPC entry in the glossary Signed-off-by: Melissa Wen Reviewed-by: Tales Aparecida Reviewed-by: Rodrigo Siqueira --- .../gpu/amdgpu/display/dc-glossary.rst | 2 +- drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h | 91 ++++++++++++++++--- 2 files changed, 78 insertions(+), 15 deletions(-) diff --git a/Documentation/gpu/amdgpu/display/dc-glossary.rst b/Documentation/gpu/amdgpu/display/dc-glossary.rst index 116f5f0942fd7..0b0ffd428dd2e 100644 --- a/Documentation/gpu/amdgpu/display/dc-glossary.rst +++ b/Documentation/gpu/amdgpu/display/dc-glossary.rst @@ -170,7 +170,7 @@ consider asking in the amdgfx and update this page. MC Memory Controller - MPC + MPC/MPCC Multiple pipes and plane combine MPO diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h index 5097037e39625..8d86159d9de09 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h @@ -22,6 +22,16 @@ * */ +/** + * DOC: mpc-overview + * + * Multiple Pipe/Plane Combined (MPC) is a component in the hardware pipeline + * that performs blending of multiple planes, using global and per-pixel alpha. + * It also performs post-blending color correction operations according to the + * hardware capabilities, such as color transformation matrix and gamma 1D and + * 3D LUT. + */ + #ifndef __DC_MPCC_H__ #define __DC_MPCC_H__ @@ -48,14 +58,39 @@ enum mpcc_blend_mode { MPCC_BLEND_MODE_TOP_BOT_BLENDING }; +/** + * enum mpcc_alpha_blend_mode - define the alpha blend mode regarding pixel + * alpha and plane alpha values + */ enum mpcc_alpha_blend_mode { + /** + * @MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA: per pixel alpha using DPP + * alpha value + */ MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA, + /** + * @MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN: per + * pixel alpha using DPP alpha value multiplied by a global gain (plane + * alpha) + */ MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN, + /** + * @MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA: global alpha value, ignores + * pixel alpha and consider only plane alpha + */ MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA }; -/* - * MPCC blending configuration +/** + * struct mpcc_blnd_cfg - MPCC blending configuration + * + * @black_color: background color + * @alpha_mode: alpha blend mode (MPCC_ALPHA_BLND_MODE) + * @pre_multiplied_alpha: whether pixel color values were pre-multiplied by the + * alpha channel (MPCC_ALPHA_MULTIPLIED_MODE) + * @global_gain: used when blend mode considers both pixel alpha and plane + * alpha value and assumes the global alpha value. + * @global_alpha: plane alpha value */ struct mpcc_blnd_cfg { struct tg_color black_color; /* background color */ @@ -107,8 +142,15 @@ struct mpc_dwb_flow_control { int flow_ctrl_cnt1; }; -/* - * MPCC connection and blending configuration for a single MPCC instance. +/** + * struct mpcc - MPCC connection and blending configuration for a single MPCC instance. + * @mpcc_id: MPCC physical instance + * @dpp_id: DPP input to this MPCC + * @mpcc_bot: pointer to bottom layer MPCC. NULL when not connected. + * @blnd_cfg: the blending configuration for this MPCC + * @sm_cfg: stereo mix setting for this MPCC + * @shared_bottom: if MPCC output to both OPP and DWB endpoints, true. Otherwise, false. + * * This struct is used as a node in an MPC tree. */ struct mpcc { @@ -120,8 +162,12 @@ struct mpcc { bool shared_bottom; /* TRUE if MPCC output to both OPP and DWB endpoints, else FALSE */ }; -/* - * MPC tree represents all MPCC connections for a pipe. +/** + * struct mpc_tree - MPC tree represents all MPCC connections for a pipe. + * + * @opp_id: the OPP instance that owns this MPC tree + * @opp_list: the top MPCC layer of the MPC tree that outputs to OPP endpoint + * */ struct mpc_tree { int opp_id; /* The OPP instance that owns this MPC tree */ @@ -149,13 +195,18 @@ struct mpcc_state { uint32_t busy; }; +/** + * struct mpc_funcs - funcs + */ struct mpc_funcs { void (*read_mpcc_state)( struct mpc *mpc, int mpcc_inst, struct mpcc_state *s); - /* + /** + * @insert_plane: + * * Insert DPP into MPC tree based on specified blending position. * Only used for planes that are part of blending chain for OPP output * @@ -180,7 +231,9 @@ struct mpc_funcs { int dpp_id, int mpcc_id); - /* + /** + * @remove_mpcc: + * * Remove a specified MPCC from the MPC tree. * * Parameters: @@ -195,7 +248,9 @@ struct mpc_funcs { struct mpc_tree *tree, struct mpcc *mpcc); - /* + /** + * @mpc_init: + * * Reset the MPCC HW status by disconnecting all muxes. * * Parameters: @@ -208,7 +263,9 @@ struct mpc_funcs { struct mpc *mpc, unsigned int mpcc_id); - /* + /** + * @update_blending: + * * Update the blending configuration for a specified MPCC. * * Parameters: @@ -223,7 +280,9 @@ struct mpc_funcs { struct mpcc_blnd_cfg *blnd_cfg, int mpcc_id); - /* + /** + * @cursor_lock: + * * Lock cursor updates for the specified OPP. * OPP defines the set of MPCC that are locked together for cursor. * @@ -239,8 +298,10 @@ struct mpc_funcs { int opp_id, bool lock); - /* - * Add DPP into 'secondary' MPC tree based on specified blending position. + /** + * @insert_plane_to_secondary: + * + * Add DPP into secondary MPC tree based on specified blending position. * Only used for planes that are part of blending chain for DWB output * * Parameters: @@ -264,7 +325,9 @@ struct mpc_funcs { int dpp_id, int mpcc_id); - /* + /** + * @remove_mpcc_from_secondary: + * * Remove a specified DPP from the 'secondary' MPC tree. * * Parameters: From d9d2b767ba95ff39631896430282526607cd9d56 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 4 Aug 2022 14:01:07 -0100 Subject: [PATCH 21/79] Documentation/gpu/amdgpu/amdgpu_dm: add DM docs for pixel blend mode AMD GPU display manager (DM) maps DRM pixel blend modes (None, Pre-multiplied, Coverage) to MPC hw blocks through blend configuration options. Describe relevant elements and how to set and test them to get the expected DRM blend mode on DCN hw. v2: - add ref tag (Tales) Signed-off-by: Melissa Wen Reviewed-by: Tales Aparecida Reviewed-by: Rodrigo Siqueira --- .../gpu/amdgpu/display/display-manager.rst | 98 +++++++++++++++++++ Documentation/gpu/drm-kms.rst | 2 + 2 files changed, 100 insertions(+) diff --git a/Documentation/gpu/amdgpu/display/display-manager.rst b/Documentation/gpu/amdgpu/display/display-manager.rst index 88e2c08c70144..b7abb18cfc820 100644 --- a/Documentation/gpu/amdgpu/display/display-manager.rst +++ b/Documentation/gpu/amdgpu/display/display-manager.rst @@ -83,3 +83,101 @@ schemas. **DCN 3.0 family color caps and mapping** .. kernel-figure:: dcn3_cm_drm_current.svg + +Blend Mode Properties +===================== + +Pixel blend mode is a DRM plane composition property of :c:type:`drm_plane` used to +describes how pixels from a foreground plane (fg) are composited with the +background plane (bg). Here, we present main concepts of DRM blend mode to help +to understand how this property is mapped to AMD DC interface. See more about +this DRM property and the alpha blending equations in :ref:`DRM Plane +Composition Properties `. + +Basically, a blend mode sets the alpha blending equation for plane +composition that fits the mode in which the alpha channel affects the state of +pixel color values and, therefore, the resulted pixel color. For +example, consider the following elements of the alpha blending equation: + +- *fg.rgb*: Each of the RGB component values from the foreground's pixel. +- *fg.alpha*: Alpha component value from the foreground's pixel. +- *bg.rgb*: Each of the RGB component values from the background. +- *plane_alpha*: Plane alpha value set by the **plane "alpha" property**, see + more in :ref:`DRM Plane Composition Properties `. + +in the basic alpha blending equation:: + + out.rgb = alpha * fg.rgb + (1 - alpha) * bg.rgb + +the alpha channel value of each pixel in a plane is ignored and only the plane +alpha affects the resulted pixel color values. + +DRM has three blend mode to define the blend formula in the plane composition: + +* **None**: Blend formula that ignores the pixel alpha. + +* **Pre-multiplied**: Blend formula that assumes the pixel color values in a + plane was already pre-multiplied by its own alpha channel before storage. + +* **Coverage**: Blend formula that assumes the pixel color values were not + pre-multiplied with the alpha channel values. + +and pre-multiplied is the default pixel blend mode, that means, when no blend +mode property is created or defined, DRM considers the plane's pixels has +pre-multiplied color values. On IGT GPU tools, the kms_plane_alpha_blend test +provides a set of subtests to verify plane alpha and blend mode properties. + +The DRM blend mode and its elements are then mapped by AMDGPU display manager +(DM) to program the blending configuration of the Multiple Pipe/Plane Combined +(MPC), as follows: + +.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h + :doc: mpc-overview + +.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h + :functions: mpcc_blnd_cfg + +Therefore, the blending configuration for a single MPCC instance on the MPC +tree is defined by :c:type:`mpcc_blnd_cfg`, where +:c:type:`pre_multiplied_alpha` is the alpha pre-multiplied mode flag used to +set :c:type:`MPCC_ALPHA_MULTIPLIED_MODE`. It controls whether alpha is +multiplied (true/false), being only true for DRM pre-multiplied blend mode. +:c:type:`mpcc_alpha_blend_mode` defines the alpha blend mode regarding pixel +alpha and plane alpha values. It sets one of the three modes for +:c:type:`MPCC_ALPHA_BLND_MODE`, as described below. + +.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h + :functions: mpcc_alpha_blend_mode + +DM then maps the elements of `enum mpcc_alpha_blend_mode` to those in the DRM +blend formula, as follows: + +* *MPC pixel alpha* matches *DRM fg.alpha* as the alpha component value + from the plane's pixel +* *MPC global alpha* matches *DRM plane_alpha* when the pixel alpha should + be ignored and, therefore, pixel values are not pre-multiplied +* *MPC global gain* assumes *MPC global alpha* value when both *DRM + fg.alpha* and *DRM plane_alpha* participate in the blend equation + +In short, *fg.alpha* is ignored by selecting +:c:type:`MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA`. On the other hand, (plane_alpha * +fg.alpha) component becomes available by selecting +:c:type:`MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN`. And the +:c:type:`MPCC_ALPHA_MULTIPLIED_MODE` defines if the pixel color values are +pre-multiplied by alpha or not. + +Blend configuration flow +------------------------ + +The alpha blending equation is configured from DRM to DC interface by the +following path: + +1. When updating a :c:type:`drm_plane_state `, DM calls + :c:type:`fill_blending_from_plane_state()` that maps + :c:type:`drm_plane_state ` attributes to + :c:type:`dc_plane_info ` struct to be handled in the + OS-agnostic component (DC). + +2. On DC interface, :c:type:`struct mpcc_blnd_cfg ` programs the + MPCC blend configuration considering the :c:type:`dc_plane_info + ` input from DPP. diff --git a/Documentation/gpu/drm-kms.rst b/Documentation/gpu/drm-kms.rst index 6f9c064fd3234..b4377a545425b 100644 --- a/Documentation/gpu/drm-kms.rst +++ b/Documentation/gpu/drm-kms.rst @@ -532,6 +532,8 @@ Standard Plane Properties .. kernel-doc:: drivers/gpu/drm/drm_plane.c :doc: standard plane properties +.. _plane_composition_properties: + Plane Composition Properties ---------------------------- From a02987e43ead27ac5bf8a207d847e188e69d61ec Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Fri, 29 Jul 2022 15:05:58 +0800 Subject: [PATCH 22/79] drm/amdgpu: add GFX Clock Gating support for GC IP v11.0.1 Add below GFX Clock Gating supports: 1. GFX Coarse Grain Clock Gating(CGCG) 2. GFX Coarse grain light sleep/deep sleep(CGLS) 3. GFX Medium Grain Clock Gating(MGCG) 4. GFX Fine Grain Clock Gating(FGCG) 5. Repeater Fine Grain Clock Gating 6. Perfmon Clock Gating Signed-off-by: Tim Huang Reviewed-by: Yifan Zhang --- drivers/gpu/drm/amd/amdgpu/soc21.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 52816de5e17bf..b700c6cb14b48 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -586,6 +586,12 @@ static int soc21_common_early_init(void *handle) break; case IP_VERSION(11, 0, 1): adev->cg_flags = + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_FGCG | + AMD_CG_SUPPORT_REPEATER_FGCG | + AMD_CG_SUPPORT_GFX_PERF_CLK | AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG; adev->pg_flags = From cfa71b1b7dff76a629c6f246bd64267508f670e6 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Fri, 29 Jul 2022 15:37:06 +0800 Subject: [PATCH 23/79] drm/amdgpu: enable GFX Clock Gating control for GC IP v11.0.1 Enable GFX CG gate/ungate control. Signed-off-by: Tim Huang Reviewed-by: Yifan Zhang --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 6fd71cb10e54a..e03618803a1c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -5310,6 +5310,7 @@ static int gfx_v11_0_set_clockgating_state(void *handle, switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): gfx_v11_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); From dee6fa9ff1016f5a0dbdfbdb0627a1dbe2a84afa Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Fri, 5 Aug 2022 15:16:16 +0800 Subject: [PATCH 24/79] drm/amdgpu/pm: update smu driver interface header for SMU IP v13.0.4 Update the SmuMetrics_t defination and SMU driver interface version. Signed-off-by: Tim Huang Reviewed-by: Yifan Zhang --- .../drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h index 76f695a1d0658..ae2d337158f3b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h @@ -27,7 +27,7 @@ // *** IMPORTANT *** // SMU TEAM: Always increment the interface version if // any structure is changed in this file -#define PMFW_DRIVER_IF_VERSION 4 +#define PMFW_DRIVER_IF_VERSION 5 typedef struct { int32_t value; @@ -197,6 +197,8 @@ typedef struct { uint16_t SkinTemp; uint16_t DeviceState; + uint16_t CurTemp; //[centi-Celsius] + uint16_t spare2; } SmuMetrics_t; typedef struct { From 2520fbfed359f32876cd96868562ecb489837707 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Fri, 5 Aug 2022 15:31:37 +0800 Subject: [PATCH 25/79] drm/amdgpu/pm: remove EnableGfxOff message for SMU IP v13.0.4 The EnableGfxOff message is dropped from the new PMFW ppsmc interface. Signed-off-by: Tim Huang Reviewed-by: Yifan Zhang --- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c index 82d3718d83244..64665e8a86e4d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c @@ -71,7 +71,6 @@ static struct cmn2asic_msg_mapping smu_v13_0_4_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetPmfwVersion, 1), MSG_MAP(GetDriverIfVersion, PPSMC_MSG_GetDriverIfVersion, 1), - MSG_MAP(EnableGfxOff, PPSMC_MSG_EnableGfxOff, 1), MSG_MAP(AllowGfxOff, PPSMC_MSG_AllowGfxOff, 1), MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff, 1), MSG_MAP(PowerDownVcn, PPSMC_MSG_PowerDownVcn, 1), @@ -226,18 +225,6 @@ static int smu_v13_0_4_system_features_control(struct smu_context *smu, bool en) return ret; } -static int smu_v13_0_4_post_smu_init(struct smu_context *smu) -{ - struct amdgpu_device *adev = smu->adev; - int ret = 0; - - /* allow message will be sent after enable message */ - ret = smu_cmn_send_smc_msg(smu, SMU_MSG_EnableGfxOff, NULL); - if (ret) - dev_err(adev->dev, "Failed to Enable GfxOff!\n"); - return ret; -} - static ssize_t smu_v13_0_4_get_gpu_metrics(struct smu_context *smu, void **table) { @@ -1026,7 +1013,6 @@ static const struct pptable_funcs smu_v13_0_4_ppt_funcs = { .get_pp_feature_mask = smu_cmn_get_pp_feature_mask, .set_driver_table_location = smu_v13_0_set_driver_table_location, .gfx_off_control = smu_v13_0_gfx_off_control, - .post_init = smu_v13_0_4_post_smu_init, .mode2_reset = smu_v13_0_4_mode2_reset, .get_dpm_ultimate_freq = smu_v13_0_4_get_dpm_ultimate_freq, .od_edit_dpm_table = smu_v13_0_od_edit_dpm_table, From 58b3dee5bef135390fcf401f8b56eeed82a234ae Mon Sep 17 00:00:00 2001 From: Daniel Miess Date: Tue, 19 Jul 2022 14:07:41 -0400 Subject: [PATCH 26/79] drm/amd/display: Use pixels per container logic for DCN314 DCCG dividers [Why] DP DSC YCbCr422 not working because dcn314_calculate_dccg_k1_k2_values does not account for two pixels per container [How] Replace the contents of dcn314_calculate_dccg_k1_k2_values with the code from the function dcn32_calculate_dccg_k1_k2_values Reviewed-by: Charlene Liu Acked-by: Tom Chung Signed-off-by: Daniel Miess Tested-by: Daniel Wheeler --- .../gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c index 29085be7a2fd3..30f65a8416dfd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c @@ -347,7 +347,10 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig { struct dc_stream_state *stream = pipe_ctx->stream; unsigned int odm_combine_factor = 0; + struct dc *dc = pipe_ctx->stream->ctx->dc; + bool two_pix_per_container = false; + two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing); odm_combine_factor = get_odm_config(pipe_ctx, NULL); if (is_dp_128b_132b_signal(pipe_ctx)) { @@ -359,16 +362,13 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig else *k2_div = PIXEL_RATE_DIV_BY_4; } else if (dc_is_dp_signal(pipe_ctx->stream->signal)) { - if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) { + if (two_pix_per_container) { *k1_div = PIXEL_RATE_DIV_BY_1; *k2_div = PIXEL_RATE_DIV_BY_2; - } else if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) { - *k1_div = PIXEL_RATE_DIV_BY_2; - *k2_div = PIXEL_RATE_DIV_BY_2; } else { - if (odm_combine_factor == 1) - *k2_div = PIXEL_RATE_DIV_BY_4; - else if (odm_combine_factor == 2) + *k1_div = PIXEL_RATE_DIV_BY_1; + *k2_div = PIXEL_RATE_DIV_BY_4; + if ((odm_combine_factor == 2) || dc->debug.enable_dp_dig_pixel_rate_div_policy) *k2_div = PIXEL_RATE_DIV_BY_2; } } From b69d6ec4202e44ad45efe94c79a1f495aef47a5d Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Tue, 19 Jul 2022 16:28:50 -0400 Subject: [PATCH 27/79] drm/amd/display: Create FPU files for DCN314 DCN314 has multiple references to FPU operations inside the resource files, and we need to move those codes to the DML folder. This commit creates the dcn314_fpu files and moves the bounding box operation to this file. Reviewed-by: Nicholas Kazlauskas Acked-by: Tom Chung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler --- .../amd/display/dc/dcn314/dcn314_resource.c | 235 +--------------- .../amd/display/dc/dcn314/dcn314_resource.h | 3 + drivers/gpu/drm/amd/display/dc/dml/Makefile | 2 + .../amd/display/dc/dml/dcn314/dcn314_fpu.c | 266 ++++++++++++++++++ .../amd/display/dc/dml/dcn314/dcn314_fpu.h | 37 +++ 5 files changed, 312 insertions(+), 231 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index 64daa631497ee..34510db750a1f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -70,6 +70,7 @@ #include "dce110/dce110_resource.h" #include "dml/display_mode_vba.h" #include "dml/dcn31/dcn31_fpu.h" +#include "dml/dcn314/dcn314_fpu.h" #include "dcn314/dcn314_dccg.h" #include "dcn10/dcn10_resource.h" #include "dcn31/dcn31_panel_cntl.h" @@ -132,155 +133,6 @@ static const struct IP_BASE DCN_BASE = { { { { 0x00000012, 0x000000C0, 0x000034C #define DC_LOGGER_INIT(logger) -#define DCN3_14_DEFAULT_DET_SIZE 384 -#define DCN3_14_MAX_DET_SIZE 384 -#define DCN3_14_MIN_COMPBUF_SIZE_KB 128 -#define DCN3_14_CRB_SEGMENT_SIZE_KB 64 -struct _vcs_dpi_ip_params_st dcn3_14_ip = { - .VBlankNomDefaultUS = 668, - .gpuvm_enable = 1, - .gpuvm_max_page_table_levels = 1, - .hostvm_enable = 1, - .hostvm_max_page_table_levels = 2, - .rob_buffer_size_kbytes = 64, - .det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE, - .config_return_buffer_size_in_kbytes = 1792, - .compressed_buffer_segment_size_in_kbytes = 64, - .meta_fifo_size_in_kentries = 32, - .zero_size_buffer_entries = 512, - .compbuf_reserved_space_64b = 256, - .compbuf_reserved_space_zs = 64, - .dpp_output_buffer_pixels = 2560, - .opp_output_buffer_lines = 1, - .pixel_chunk_size_kbytes = 8, - .meta_chunk_size_kbytes = 2, - .min_meta_chunk_size_bytes = 256, - .writeback_chunk_size_kbytes = 8, - .ptoi_supported = false, - .num_dsc = 4, - .maximum_dsc_bits_per_component = 10, - .dsc422_native_support = false, - .is_line_buffer_bpp_fixed = true, - .line_buffer_fixed_bpp = 48, - .line_buffer_size_bits = 789504, - .max_line_buffer_lines = 12, - .writeback_interface_buffer_size_kbytes = 90, - .max_num_dpp = 4, - .max_num_otg = 4, - .max_num_hdmi_frl_outputs = 1, - .max_num_wb = 1, - .max_dchub_pscl_bw_pix_per_clk = 4, - .max_pscl_lb_bw_pix_per_clk = 2, - .max_lb_vscl_bw_pix_per_clk = 4, - .max_vscl_hscl_bw_pix_per_clk = 4, - .max_hscl_ratio = 6, - .max_vscl_ratio = 6, - .max_hscl_taps = 8, - .max_vscl_taps = 8, - .dpte_buffer_size_in_pte_reqs_luma = 64, - .dpte_buffer_size_in_pte_reqs_chroma = 34, - .dispclk_ramp_margin_percent = 1, - .max_inter_dcn_tile_repeaters = 8, - .cursor_buffer_size = 16, - .cursor_chunk_size = 2, - .writeback_line_buffer_buffer_size = 0, - .writeback_min_hscl_ratio = 1, - .writeback_min_vscl_ratio = 1, - .writeback_max_hscl_ratio = 1, - .writeback_max_vscl_ratio = 1, - .writeback_max_hscl_taps = 1, - .writeback_max_vscl_taps = 1, - .dppclk_delay_subtotal = 46, - .dppclk_delay_scl = 50, - .dppclk_delay_scl_lb_only = 16, - .dppclk_delay_cnvc_formatter = 27, - .dppclk_delay_cnvc_cursor = 6, - .dispclk_delay_subtotal = 119, - .dynamic_metadata_vm_enabled = false, - .odm_combine_4to1_supported = false, - .dcc_supported = true, -}; - -struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = { - /*TODO: correct dispclk/dppclk voltage level determination*/ - .clock_limits = { - { - .state = 0, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, - .phyclk_mhz = 600.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 186.0, - .dtbclk_mhz = 625.0, - }, - { - .state = 1, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 209.0, - .dtbclk_mhz = 625.0, - }, - { - .state = 2, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 209.0, - .dtbclk_mhz = 625.0, - }, - { - .state = 3, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 371.0, - .dtbclk_mhz = 625.0, - }, - { - .state = 4, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 417.0, - .dtbclk_mhz = 625.0, - }, - }, - .num_states = 5, - .sr_exit_time_us = 9.0, - .sr_enter_plus_exit_time_us = 11.0, - .sr_exit_z8_time_us = 442.0, - .sr_enter_plus_exit_z8_time_us = 560.0, - .writeback_latency_us = 12.0, - .dram_channel_width_bytes = 4, - .round_trip_ping_latency_dcfclk_cycles = 106, - .urgent_latency_pixel_data_only_us = 4.0, - .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, - .urgent_latency_vm_data_only_us = 4.0, - .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, - .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, - .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, - .pct_ideal_sdp_bw_after_urgent = 80.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, - .max_avg_sdp_bw_use_normal_percent = 60.0, - .max_avg_dram_bw_use_normal_percent = 60.0, - .fabric_datapath_to_dcn_data_return_bytes = 32, - .return_bus_width_bytes = 64, - .downspread_percent = 0.38, - .dcn_downspread_percent = 0.5, - .gpuvm_min_page_size_bytes = 4096, - .hostvm_min_page_size_bytes = 4096, - .do_urgent_latency_adjustment = false, - .urgent_latency_adjustment_fabric_clock_component_us = 0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, -}; - enum dcn31_clk_src_array_id { DCN31_CLK_SRC_PLL0, DCN31_CLK_SRC_PLL1, @@ -1912,88 +1764,9 @@ static struct dc_cap_funcs cap_funcs = { static void dcn314_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { - struct clk_limit_table *clk_table = &bw_params->clk_table; - struct _vcs_dpi_voltage_scaling_st *clock_tmp = dcn3_14_soc._clock_tmp; - unsigned int i, closest_clk_lvl; - int max_dispclk_mhz = 0, max_dppclk_mhz = 0; - int j; - - // Default clock levels are used for diags, which may lead to overclocking. - if (!IS_DIAG_DC(dc->ctx->dce_environment)) { - - dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; - dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count; - - if (bw_params->num_channels > 0) - dcn3_14_soc.num_chans = bw_params->num_channels; - - ASSERT(dcn3_14_soc.num_chans); - ASSERT(clk_table->num_entries); - - /* Prepass to find max clocks independent of voltage level. */ - for (i = 0; i < clk_table->num_entries; ++i) { - if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) - max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; - if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) - max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; - } - - for (i = 0; i < clk_table->num_entries; i++) { - /* loop backwards*/ - for (closest_clk_lvl = 0, j = dcn3_14_soc.num_states - 1; j >= 0; j--) { - if ((unsigned int) dcn3_14_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { - closest_clk_lvl = j; - break; - } - } - if (clk_table->num_entries == 1) { - /*smu gives one DPM level, let's take the highest one*/ - closest_clk_lvl = dcn3_14_soc.num_states - 1; - } - - clock_tmp[i].state = i; - - /* Clocks dependent on voltage level. */ - clock_tmp[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; - if (clk_table->num_entries == 1 && - clock_tmp[i].dcfclk_mhz < dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) { - /*SMU fix not released yet*/ - clock_tmp[i].dcfclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz; - } - clock_tmp[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; - clock_tmp[i].socclk_mhz = clk_table->entries[i].socclk_mhz; - - if (clk_table->entries[i].memclk_mhz && clk_table->entries[i].wck_ratio) - clock_tmp[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; - - /* Clocks independent of voltage level. */ - clock_tmp[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : - dcn3_14_soc.clock_limits[closest_clk_lvl].dispclk_mhz; - - clock_tmp[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : - dcn3_14_soc.clock_limits[closest_clk_lvl].dppclk_mhz; - - clock_tmp[i].dram_bw_per_chan_gbps = dcn3_14_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; - clock_tmp[i].dscclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dscclk_mhz; - clock_tmp[i].dtbclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; - clock_tmp[i].phyclk_d18_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; - clock_tmp[i].phyclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_mhz; - } - for (i = 0; i < clk_table->num_entries; i++) - dcn3_14_soc.clock_limits[i] = clock_tmp[i]; - if (clk_table->num_entries) - dcn3_14_soc.num_states = clk_table->num_entries; - } - - if (max_dispclk_mhz) { - dcn3_14_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; - dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; - } - - if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) - dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31); - else - dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31_FPGA); + DC_FP_START(); + dcn314_update_bw_bounding_box_fpu(dc, bw_params); + DC_FP_END(); } static struct resource_funcs dcn314_res_pool_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h index c41108847ce08..0dd3153aa5c17 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h @@ -29,6 +29,9 @@ #include "core_types.h" +extern struct _vcs_dpi_ip_params_st dcn3_14_ip; +extern struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc; + #define TO_DCN314_RES_POOL(pool)\ container_of(pool, struct dcn314_resource_pool, base) diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 7f01462987d18..3469dd25fcc10 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -68,6 +68,7 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(fram CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn314/dcn314_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_ccflags) $(frame_warn_flag) @@ -127,6 +128,7 @@ DML += dcn321/dcn321_fpu.o DML += dcn301/dcn301_fpu.o DML += dcn302/dcn302_fpu.o DML += dcn303/dcn303_fpu.o +DML += dcn314/dcn314_fpu.o DML += dsc/rc_calc_fpu.o DML += calcs/dcn_calcs.o calcs/dcn_calc_math.o calcs/dcn_calc_auto.o endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c new file mode 100644 index 0000000000000..340c3ca223454 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c @@ -0,0 +1,266 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "clk_mgr.h" +#include "resource.h" +#include "dcn314_fpu.h" +#include "dml/display_mode_vba.h" + +struct _vcs_dpi_ip_params_st dcn3_14_ip = { + .VBlankNomDefaultUS = 668, + .gpuvm_enable = 1, + .gpuvm_max_page_table_levels = 1, + .hostvm_enable = 1, + .hostvm_max_page_table_levels = 2, + .rob_buffer_size_kbytes = 64, + .det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE, + .config_return_buffer_size_in_kbytes = 1792, + .compressed_buffer_segment_size_in_kbytes = 64, + .meta_fifo_size_in_kentries = 32, + .zero_size_buffer_entries = 512, + .compbuf_reserved_space_64b = 256, + .compbuf_reserved_space_zs = 64, + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .meta_chunk_size_kbytes = 2, + .min_meta_chunk_size_bytes = 256, + .writeback_chunk_size_kbytes = 8, + .ptoi_supported = false, + .num_dsc = 4, + .maximum_dsc_bits_per_component = 10, + .dsc422_native_support = false, + .is_line_buffer_bpp_fixed = true, + .line_buffer_fixed_bpp = 48, + .line_buffer_size_bits = 789504, + .max_line_buffer_lines = 12, + .writeback_interface_buffer_size_kbytes = 90, + .max_num_dpp = 4, + .max_num_otg = 4, + .max_num_hdmi_frl_outputs = 1, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 6, + .max_vscl_ratio = 6, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dpte_buffer_size_in_pte_reqs_luma = 64, + .dpte_buffer_size_in_pte_reqs_chroma = 34, + .dispclk_ramp_margin_percent = 1, + .max_inter_dcn_tile_repeaters = 8, + .cursor_buffer_size = 16, + .cursor_chunk_size = 2, + .writeback_line_buffer_buffer_size = 0, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_max_hscl_taps = 1, + .writeback_max_vscl_taps = 1, + .dppclk_delay_subtotal = 46, + .dppclk_delay_scl = 50, + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_cnvc_formatter = 27, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 119, + .dynamic_metadata_vm_enabled = false, + .odm_combine_4to1_supported = false, + .dcc_supported = true, +}; + +struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = { + /*TODO: correct dispclk/dppclk voltage level determination*/ + .clock_limits = { + { + .state = 0, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 600.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 186.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 1, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 209.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 2, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 209.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 3, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 371.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 4, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 417.0, + .dtbclk_mhz = 625.0, + }, + }, + .num_states = 5, + .sr_exit_time_us = 9.0, + .sr_enter_plus_exit_time_us = 11.0, + .sr_exit_z8_time_us = 442.0, + .sr_enter_plus_exit_z8_time_us = 560.0, + .writeback_latency_us = 12.0, + .dram_channel_width_bytes = 4, + .round_trip_ping_latency_dcfclk_cycles = 106, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_sdp_bw_after_urgent = 80.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, + .max_avg_sdp_bw_use_normal_percent = 60.0, + .max_avg_dram_bw_use_normal_percent = 60.0, + .fabric_datapath_to_dcn_data_return_bytes = 32, + .return_bus_width_bytes = 64, + .downspread_percent = 0.38, + .dcn_downspread_percent = 0.5, + .gpuvm_min_page_size_bytes = 4096, + .hostvm_min_page_size_bytes = 4096, + .do_urgent_latency_adjustment = false, + .urgent_latency_adjustment_fabric_clock_component_us = 0, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, +}; + + +void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params) +{ + struct clk_limit_table *clk_table = &bw_params->clk_table; + struct _vcs_dpi_voltage_scaling_st *clock_limits = + dcn3_14_soc.clock_limits; + unsigned int i, closest_clk_lvl; + int max_dispclk_mhz = 0, max_dppclk_mhz = 0; + int j; + + dc_assert_fp_enabled(); + + // Default clock levels are used for diags, which may lead to overclocking. + if (!IS_DIAG_DC(dc->ctx->dce_environment)) { + + dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; + dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count; + + if (bw_params->num_channels > 0) + dcn3_14_soc.num_chans = bw_params->num_channels; + + ASSERT(dcn3_14_soc.num_chans); + ASSERT(clk_table->num_entries); + + /* Prepass to find max clocks independent of voltage level. */ + for (i = 0; i < clk_table->num_entries; ++i) { + if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; + if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; + } + + for (i = 0; i < clk_table->num_entries; i++) { + /* loop backwards*/ + for (closest_clk_lvl = 0, j = dcn3_14_soc.num_states - 1; j >= 0; j--) { + if ((unsigned int) dcn3_14_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { + closest_clk_lvl = j; + break; + } + } + if (clk_table->num_entries == 1) { + /*smu gives one DPM level, let's take the highest one*/ + closest_clk_lvl = dcn3_14_soc.num_states - 1; + } + + clock_limits[i].state = i; + + /* Clocks dependent on voltage level. */ + clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; + if (clk_table->num_entries == 1 && + clock_limits[i].dcfclk_mhz < dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) { + /*SMU fix not released yet*/ + clock_limits[i].dcfclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz; + } + clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; + clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz; + + if (clk_table->entries[i].memclk_mhz && clk_table->entries[i].wck_ratio) + clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; + + /* Clocks independent of voltage level. */ + clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : + dcn3_14_soc.clock_limits[closest_clk_lvl].dispclk_mhz; + + clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : + dcn3_14_soc.clock_limits[closest_clk_lvl].dppclk_mhz; + + clock_limits[i].dram_bw_per_chan_gbps = dcn3_14_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; + clock_limits[i].dscclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dscclk_mhz; + clock_limits[i].dtbclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; + clock_limits[i].phyclk_d18_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; + clock_limits[i].phyclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_mhz; + } + for (i = 0; i < clk_table->num_entries; i++) + dcn3_14_soc.clock_limits[i] = clock_limits[i]; + if (clk_table->num_entries) { + dcn3_14_soc.num_states = clk_table->num_entries; + } + } + + if (max_dispclk_mhz) { + dcn3_14_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; + dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; + } + + if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) + dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31); + else + dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31_FPGA); +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h new file mode 100644 index 0000000000000..79466d4f41851 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DCN314_FPU_H__ +#define __DCN314_FPU_H__ + +#define DCN3_14_DEFAULT_DET_SIZE 384 +#define DCN3_14_MAX_DET_SIZE 384 +#define DCN3_14_MIN_COMPBUF_SIZE_KB 128 +#define DCN3_14_CRB_SEGMENT_SIZE_KB 64 + +void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params); + +#endif From 028075ccb00a8a3546595654c6e9cf93f11df024 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Tue, 19 Jul 2022 16:57:23 -0400 Subject: [PATCH 28/79] drm/amd/display: Move populate dml pipes from DCN314 to dml The function responsible for populating DML pipes has some FPU operations, and for this reason, it must be moved to the dml folder. This commit moves such function from resource to the fpu file under the dml folder. Reviewed-by: Nicholas Kazlauskas Acked-by: Tom Chung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler --- .../amd/display/dc/dcn314/dcn314_resource.c | 101 +--------------- .../amd/display/dc/dml/dcn314/dcn314_fpu.c | 110 ++++++++++++++++++ .../amd/display/dc/dml/dcn314/dcn314_fpu.h | 3 + 3 files changed, 117 insertions(+), 97 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index 34510db750a1f..13371331f888a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -1651,109 +1651,16 @@ static struct clock_source *dcn31_clock_source_create( return NULL; } -static bool is_dual_plane(enum surface_pixel_format format) -{ - return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA; -} - static int dcn314_populate_dml_pipes_from_context( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, bool fast_validate) { - int i, pipe_cnt; - struct resource_context *res_ctx = &context->res_ctx; - struct pipe_ctx *pipe; - bool upscaled = false; - - dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); - - for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { - struct dc_crtc_timing *timing; - - if (!res_ctx->pipe_ctx[i].stream) - continue; - pipe = &res_ctx->pipe_ctx[i]; - timing = &pipe->stream->timing; - - if (dc_extended_blank_supported(dc) && pipe->stream->adjust.v_total_max == pipe->stream->adjust.v_total_min - && pipe->stream->adjust.v_total_min > timing->v_total) - pipes[pipe_cnt].pipe.dest.vtotal = pipe->stream->adjust.v_total_min; - - if (pipe->plane_state && - (pipe->plane_state->src_rect.height < pipe->plane_state->dst_rect.height || - pipe->plane_state->src_rect.width < pipe->plane_state->dst_rect.width)) - upscaled = true; - - /* - * Immediate flip can be set dynamically after enabling the plane. - * We need to require support for immediate flip or underflow can be - * intermittently experienced depending on peak b/w requirements. - */ - pipes[pipe_cnt].pipe.src.immediate_flip = true; - - pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; - pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active; - pipes[pipe_cnt].pipe.src.gpuvm = true; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; - pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; - pipes[pipe_cnt].pipe.src.dcc_rate = 3; - pipes[pipe_cnt].dout.dsc_input_bpc = 0; - - if (pipes[pipe_cnt].dout.dsc_enable) { - switch (timing->display_color_depth) { - case COLOR_DEPTH_888: - pipes[pipe_cnt].dout.dsc_input_bpc = 8; - break; - case COLOR_DEPTH_101010: - pipes[pipe_cnt].dout.dsc_input_bpc = 10; - break; - case COLOR_DEPTH_121212: - pipes[pipe_cnt].dout.dsc_input_bpc = 12; - break; - default: - ASSERT(0); - break; - } - } - - pipe_cnt++; - } - context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE; - - dc->config.enable_4to1MPC = false; - if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) { - if (is_dual_plane(pipe->plane_state->format) - && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) { - dc->config.enable_4to1MPC = true; - } else if (!is_dual_plane(pipe->plane_state->format) && pipe->plane_state->src_rect.width <= 5120) { - /* Limit to 5k max to avoid forced pipe split when there is not enough detile for swath */ - context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; - pipes[0].pipe.src.unbounded_req_mode = true; - } - } else if (context->stream_count >= dc->debug.crb_alloc_policy_min_disp_count - && dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) { - context->bw_ctx.dml.ip.det_buffer_size_kbytes = dc->debug.crb_alloc_policy * 64; - } else if (context->stream_count >= 3 && upscaled) { - context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; - } - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (!pipe->stream) - continue; + int pipe_cnt; - if (pipe->stream->signal == SIGNAL_TYPE_EDP && dc->debug.seamless_boot_odm_combine && - pipe->stream->apply_seamless_boot_optimization) { - - if (pipe->stream->apply_boot_odm_mode == dm_odm_combine_policy_2to1) { - context->bw_ctx.dml.vba.ODMCombinePolicy = dm_odm_combine_policy_2to1; - break; - } - } - } + DC_FP_START(); + pipe_cnt = dcn314_populate_dml_pipes_from_context_fpu(dc, context, pipes, fast_validate); + DC_FP_END(); return pipe_cnt; } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c index 340c3ca223454..d52a16d280e98 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c @@ -26,7 +26,9 @@ #include "clk_mgr.h" #include "resource.h" +#include "dcn31/dcn31_hubbub.h" #include "dcn314_fpu.h" +#include "dml/dcn20/dcn20_fpu.h" #include "dml/display_mode_vba.h" struct _vcs_dpi_ip_params_st dcn3_14_ip = { @@ -264,3 +266,111 @@ void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p else dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31_FPGA); } + +static bool is_dual_plane(enum surface_pixel_format format) +{ + return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA; +} + +int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate) +{ + int i, pipe_cnt; + struct resource_context *res_ctx = &context->res_ctx; + struct pipe_ctx *pipe; + bool upscaled = false; + + dc_assert_fp_enabled(); + + dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + struct dc_crtc_timing *timing; + + if (!res_ctx->pipe_ctx[i].stream) + continue; + pipe = &res_ctx->pipe_ctx[i]; + timing = &pipe->stream->timing; + + if (dc_extended_blank_supported(dc) && pipe->stream->adjust.v_total_max == pipe->stream->adjust.v_total_min + && pipe->stream->adjust.v_total_min > timing->v_total) + pipes[pipe_cnt].pipe.dest.vtotal = pipe->stream->adjust.v_total_min; + + if (pipe->plane_state && + (pipe->plane_state->src_rect.height < pipe->plane_state->dst_rect.height || + pipe->plane_state->src_rect.width < pipe->plane_state->dst_rect.width)) + upscaled = true; + + /* + * Immediate flip can be set dynamically after enabling the plane. + * We need to require support for immediate flip or underflow can be + * intermittently experienced depending on peak b/w requirements. + */ + pipes[pipe_cnt].pipe.src.immediate_flip = true; + + pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; + pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active; + pipes[pipe_cnt].pipe.src.gpuvm = true; + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; + pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; + pipes[pipe_cnt].pipe.src.dcc_rate = 3; + pipes[pipe_cnt].dout.dsc_input_bpc = 0; + + if (pipes[pipe_cnt].dout.dsc_enable) { + switch (timing->display_color_depth) { + case COLOR_DEPTH_888: + pipes[pipe_cnt].dout.dsc_input_bpc = 8; + break; + case COLOR_DEPTH_101010: + pipes[pipe_cnt].dout.dsc_input_bpc = 10; + break; + case COLOR_DEPTH_121212: + pipes[pipe_cnt].dout.dsc_input_bpc = 12; + break; + default: + ASSERT(0); + break; + } + } + + pipe_cnt++; + } + context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE; + + dc->config.enable_4to1MPC = false; + if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) { + if (is_dual_plane(pipe->plane_state->format) + && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) { + dc->config.enable_4to1MPC = true; + } else if (!is_dual_plane(pipe->plane_state->format) && pipe->plane_state->src_rect.width <= 5120) { + /* Limit to 5k max to avoid forced pipe split when there is not enough detile for swath */ + context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; + pipes[0].pipe.src.unbounded_req_mode = true; + } + } else if (context->stream_count >= dc->debug.crb_alloc_policy_min_disp_count + && dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) { + context->bw_ctx.dml.ip.det_buffer_size_kbytes = dc->debug.crb_alloc_policy * 64; + } else if (context->stream_count >= 3 && upscaled) { + context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; + } + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->stream) + continue; + + if (pipe->stream->signal == SIGNAL_TYPE_EDP && dc->debug.seamless_boot_odm_combine && + pipe->stream->apply_seamless_boot_optimization) { + + if (pipe->stream->apply_boot_odm_mode == dm_odm_combine_policy_2to1) { + context->bw_ctx.dml.vba.ODMCombinePolicy = dm_odm_combine_policy_2to1; + break; + } + } + } + + return pipe_cnt; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h index 79466d4f41851..d32c5bb99f4c9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h @@ -33,5 +33,8 @@ #define DCN3_14_CRB_SEGMENT_SIZE_KB 64 void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params); +int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate); #endif From b9dabb20eeed118916ba8d1312ee7a3f753f0e09 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Tue, 19 Jul 2022 17:02:56 -0400 Subject: [PATCH 29/79] drm/amd/display: Drop FPU flags from Makefile At this point, we isolate the FPU code associated with DCN314 under the DML folder. This commit drops the FPU flags from the Makefile. Reviewed-by: Nicholas Kazlauskas Acked-by: Tom Chung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler --- .../gpu/drm/amd/display/dc/dcn314/Makefile | 25 ------------------- 1 file changed, 25 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile index e3b5a95e03b19..702c28c2560eb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile @@ -13,31 +13,6 @@ DCN314 = dcn314_resource.o dcn314_hwseq.o dcn314_init.o \ dcn314_dio_stream_encoder.o dcn314_dccg.o dcn314_optc.o -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o := -mhard-float -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o += -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o += -msse2 -endif -endif - AMD_DAL_DCN314 = $(addprefix $(AMDDALPATH)/dc/dcn314/,$(DCN314)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN314) From d6b807c84bc9dbafad4d3cfb03f588571b9b40b9 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Fri, 15 Jul 2022 14:59:26 -0400 Subject: [PATCH 30/79] drm/amd/display: fix CAB allocation for multiple displays [Why & How] When multiple displays are used, the underlying framebuffers could be two separate framebuffers, or a single large framebuffer. Fix the calculation logic for CAB to account for large framebuffer. Current logic assumes that any FB that the plane points to are independent. When a single FB is used on the system, this does 2 times allocation. Add a check to prevent duplicate allocation by checking if the base addresses are the same, and then ensuring that the if we allocate using the pitch, whole of the other fbs will be accounted for in the first allocation. Reviewed-by: Alvin Lee Acked-by: Tom Chung Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index 554605ac3ba1a..872b30281c9a7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -251,6 +251,7 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c uint32_t total_lines = 0; uint32_t lines_per_way = 0; uint32_t num_ways = 0; + uint32_t prev_addr_low = 0; for (i = 0; i < ctx->stream_count; i++) { stream = ctx->streams[i]; @@ -268,10 +269,20 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c plane = ctx->stream_status[i].plane_states[j]; // Calculate total surface size - surface_size = plane->plane_size.surface_pitch * + if (prev_addr_low != plane->address.grph.addr.u.low_part) { + /* if plane address are different from prev FB, then userspace allocated separate FBs*/ + surface_size += plane->plane_size.surface_pitch * plane->plane_size.surface_size.height * (plane->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4); + prev_addr_low = plane->address.grph.addr.u.low_part; + } else { + /* We have the same fb for all the planes. + * Xorg always creates one giant fb that holds all surfaces, + * so allocating it once is sufficient. + * */ + continue; + } // Convert surface size + starting address to number of cache lines required // (alignment accounted for) cache_lines_used += dcn32_cache_lines_for_surface(dc, surface_size, From e83445412647360bbe07fb6c9ca158e5c229ee7a Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Wed, 20 Jul 2022 13:16:59 -0400 Subject: [PATCH 31/79] drm/amd/display: Add a variable to update FCLK latency [Description] Add a variable to update FCLK latency Reviewed-by: Martin Leung Acked-by: Tom Chung Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 7 +++++++ drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c | 7 +++++++ 3 files changed, 15 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index e697bad17573d..979b8bfa16962 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -660,6 +660,7 @@ struct dc_bounding_box_overrides { int percent_of_ideal_drambw; int dram_clock_change_latency_ns; int dummy_clock_change_latency_ns; + int fclk_clock_change_latency_ns; /* This forces a hard min on the DCFCLK we use * for DML. Unlike the debug option for forcing * DCFCLK, this override affects watermark calculations diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 02c6e06b08aaa..de27c5a31e71d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -2082,6 +2082,13 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; } + if ((int)(dcn3_2_soc.fclk_change_latency_us * 1000) + != dc->bb_overrides.fclk_clock_change_latency_ns + && dc->bb_overrides.fclk_clock_change_latency_ns) { + dcn3_2_soc.fclk_change_latency_us = + dc->bb_overrides.fclk_clock_change_latency_ns / 1000; + } + if ((int)(dcn3_2_soc.dummy_pstate_latency_us * 1000) != dc->bb_overrides.dummy_clock_change_latency_ns && dc->bb_overrides.dummy_clock_change_latency_ns) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c index 84b4b00f29cbd..c87091683b5dc 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -498,6 +498,13 @@ void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; } + if ((int)(dcn3_21_soc.fclk_change_latency_us * 1000) + != dc->bb_overrides.fclk_clock_change_latency_ns + && dc->bb_overrides.fclk_clock_change_latency_ns) { + dcn3_21_soc.fclk_change_latency_us = + dc->bb_overrides.fclk_clock_change_latency_ns / 1000; + } + if ((int)(dcn3_21_soc.dummy_pstate_latency_us * 1000) != dc->bb_overrides.dummy_clock_change_latency_ns && dc->bb_overrides.dummy_clock_change_latency_ns) { From 170db21dfe55e8bb3fcbc645d8f6776cf58ca1f1 Mon Sep 17 00:00:00 2001 From: Daniel Miess Date: Tue, 19 Jul 2022 11:43:28 -0400 Subject: [PATCH 32/79] drm/amd/display: Fix TMDS 4K@60Hz YCbCr420 corruption issue [Why] DIG_FIFO_OUTPUT_PIXEL_MODE not being set for dcn314 resulting in incorrect timing for YCbCr4:2:0 [How] Copy the implementation of set_pixels_per_cycle from dcn32 over to dcn314 Reviewed-by: Nicholas Kazlauskas Acked-by: Tom Chung Signed-off-by: Daniel Miess Tested-by: Daniel Wheeler --- .../drm/amd/display/dc/dcn314/dcn314_hwseq.c | 28 +++++++++++++++++++ .../drm/amd/display/dc/dcn314/dcn314_hwseq.h | 4 +++ .../drm/amd/display/dc/dcn314/dcn314_init.c | 2 ++ 3 files changed, 34 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c index 30f65a8416dfd..438b984750ee8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c @@ -378,3 +378,31 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig return odm_combine_factor; } + +void dcn314_set_pixels_per_cycle(struct pipe_ctx *pipe_ctx) +{ + uint32_t pix_per_cycle = 1; + uint32_t odm_combine_factor = 1; + + if (!pipe_ctx || !pipe_ctx->stream || !pipe_ctx->stream_res.stream_enc) + return; + + odm_combine_factor = get_odm_config(pipe_ctx, NULL); + if (optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing) || odm_combine_factor > 1 + || dcn314_is_dp_dig_pixel_rate_div_policy(pipe_ctx)) + pix_per_cycle = 2; + + if (pipe_ctx->stream_res.stream_enc->funcs->set_input_mode) + pipe_ctx->stream_res.stream_enc->funcs->set_input_mode(pipe_ctx->stream_res.stream_enc, + pix_per_cycle); +} + +bool dcn314_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx) +{ + struct dc *dc = pipe_ctx->stream->ctx->dc; + + if (dc_is_dp_signal(pipe_ctx->stream->signal) && !is_dp_128b_132b_signal(pipe_ctx) && + dc->debug.enable_dp_dig_pixel_rate_div_policy) + return true; + return false; +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h index be0f5e4d48e13..d014580592aca 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h @@ -39,4 +39,8 @@ void dcn314_enable_power_gating_plane(struct dce_hwseq *hws, bool enable); unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div); +void dcn314_set_pixels_per_cycle(struct pipe_ctx *pipe_ctx); + +bool dcn314_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx); + #endif /* __DC_HWSS_DCN314_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c index 9dc4a8d182520..fc2f93edf34ea 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c @@ -147,6 +147,8 @@ static const struct hwseq_private_funcs dcn314_private_funcs = { .set_shaper_3dlut = dcn20_set_shaper_3dlut, .setup_hpo_hw_control = dcn31_setup_hpo_hw_control, .calculate_dccg_k1_k2_values = dcn314_calculate_dccg_k1_k2_values, + .set_pixels_per_cycle = dcn314_set_pixels_per_cycle, + .is_dp_dig_pixel_rate_div_policy = dcn314_is_dp_dig_pixel_rate_div_policy, }; void dcn314_hw_sequencer_construct(struct dc *dc) From 9130c7952489bdf48e1ca60879a9fa176dc3e351 Mon Sep 17 00:00:00 2001 From: Chiawen Huang Date: Thu, 21 Jul 2022 21:57:05 +0800 Subject: [PATCH 33/79] drm/amd/display: Device flash garbage before get in OS [Why] Enabling stream with tg lock makes config settings pending causing the garbage until tg unlock. [How] Keep the original lock mechanism The driver doesn't lock tg if plane_state is null. Reviewed-by: Anthony Koo Acked-by: Tom Chung Signed-off-by: Chiawen Huang Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index df0c36c630216..f6fd2073951d5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -112,6 +112,7 @@ void dcn10_lock_all_pipes(struct dc *dc, */ if (pipe_ctx->top_pipe || !pipe_ctx->stream || + !pipe_ctx->plane_state || !tg->funcs->is_tg_enabled(tg)) continue; From 27e408e4ae79ae31b437ea2e218aaa876cfaabb2 Mon Sep 17 00:00:00 2001 From: Josip Pavic Date: Thu, 21 Jul 2022 15:33:00 -0400 Subject: [PATCH 34/79] drm/amd/display: Avoid MPC infinite loop [Why] In some cases MPC tree bottom pipe ends up point to itself. This causes iterating from top to bottom to hang the system in an infinite loop. [How] When looping to next MPC bottom pipe, check that the pointer is not same as current to avoid infinite loop. Reviewed-by: Jun Lei Acked-by: Tom Chung Signed-off-by: Josip Pavic Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c | 6 ++++++ drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c index 769974375b4b3..8e9384094f6d6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c @@ -131,6 +131,12 @@ struct mpcc *mpc1_get_mpcc_for_dpp(struct mpc_tree *tree, int dpp_id) while (tmp_mpcc != NULL) { if (tmp_mpcc->dpp_id == dpp_id) return tmp_mpcc; + + /* avoid circular linked list */ + ASSERT(tmp_mpcc != tmp_mpcc->mpcc_bot); + if (tmp_mpcc == tmp_mpcc->mpcc_bot) + break; + tmp_mpcc = tmp_mpcc->mpcc_bot; } return NULL; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c index 3d307dd58e9af..116f67a0b989d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c @@ -531,6 +531,12 @@ static struct mpcc *mpc2_get_mpcc_for_dpp(struct mpc_tree *tree, int dpp_id) while (tmp_mpcc != NULL) { if (tmp_mpcc->dpp_id == 0xf || tmp_mpcc->dpp_id == dpp_id) return tmp_mpcc; + + /* avoid circular linked list */ + ASSERT(tmp_mpcc != tmp_mpcc->mpcc_bot); + if (tmp_mpcc == tmp_mpcc->mpcc_bot) + break; + tmp_mpcc = tmp_mpcc->mpcc_bot; } return NULL; From 9210e25c38e93ed3387e4e708e6fd5e489efe6f2 Mon Sep 17 00:00:00 2001 From: Leo Ma Date: Fri, 22 Jul 2022 13:42:58 -0400 Subject: [PATCH 35/79] drm/amd/display: Fix HDMI VSIF V3 incorrect issue [Why] Reported from customer the checksum in AMD VSIF V3 is incorrect and causing blank screen issue. [How] Fix the packet length issue on AMD HDMI VSIF V3. Reviewed-by: Anthony Koo Acked-by: Tom Chung Signed-off-by: Leo Ma Tested-by: Daniel Wheeler --- .../drm/amd/display/modules/freesync/freesync.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index da09ba7589f73..0f39ab9dc5b41 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -613,10 +613,6 @@ static void build_vrr_infopacket_data_v1(const struct mod_vrr_params *vrr, * Note: We should never go above the field rate of the mode timing set. */ infopacket->sb[8] = (unsigned char)((vrr->max_refresh_in_uhz + 500000) / 1000000); - - /* FreeSync HDR */ - infopacket->sb[9] = 0; - infopacket->sb[10] = 0; } static void build_vrr_infopacket_data_v3(const struct mod_vrr_params *vrr, @@ -684,10 +680,6 @@ static void build_vrr_infopacket_data_v3(const struct mod_vrr_params *vrr, /* PB16 : Reserved bits 7:1, FixedRate bit 0 */ infopacket->sb[16] = (vrr->state == VRR_STATE_ACTIVE_FIXED) ? 1 : 0; - - //FreeSync HDR - infopacket->sb[9] = 0; - infopacket->sb[10] = 0; } static void build_vrr_infopacket_fs2_data(enum color_transfer_func app_tf, @@ -772,8 +764,7 @@ static void build_vrr_infopacket_header_v2(enum signal_type signal, /* HB2 = [Bits 7:5 = 0] [Bits 4:0 = Length = 0x09] */ infopacket->hb2 = 0x09; - *payload_size = 0x0A; - + *payload_size = 0x09; } else if (dc_is_dp_signal(signal)) { /* HEADER */ @@ -822,9 +813,9 @@ static void build_vrr_infopacket_header_v3(enum signal_type signal, infopacket->hb1 = version; /* HB2 = [Bits 7:5 = 0] [Bits 4:0 = Length] */ - *payload_size = 0x10; - infopacket->hb2 = *payload_size - 1; //-1 for checksum + infopacket->hb2 = 0x10; + *payload_size = 0x10; } else if (dc_is_dp_signal(signal)) { /* HEADER */ From 59e8e1bd5c0f26db417f46b9cbaf530c94cdccbc Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Sun, 16 May 2021 11:29:26 -0400 Subject: [PATCH 36/79] drm/amd/display: Add documentation to some of the cursor struct Reviewed-by: Harry Wentland Acked-by: Tom Chung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/dc_hw_types.h | 62 +++++++++++++++++--- 1 file changed, 55 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index 982d96e97a3dc..44999a2e4cbca 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -417,19 +417,43 @@ enum dc_scan_direction { SCAN_DIRECTION_VERTICAL = 2, /* 90, 270 rotation */ }; +/** + * struct dc_cursor_position: Hardware cursor data. + * + * This struct keeps the action information related to the cursor that will be + * sent and received from our DC core. + */ struct dc_cursor_position { + /** + * @x: It represents the top left abscissa coordinate of the cursor. + */ uint32_t x; + + /** + * @y: It is the top ordinate of the cursor coordinate. + */ uint32_t y; + /** + * @x_hotspot: Define the abscissa point where mouse click happens. + */ uint32_t x_hotspot; + + /** + * @y_hotspot: Define the ordinate point where mouse click happens. + */ uint32_t y_hotspot; - /* - * This parameter indicates whether HW cursor should be enabled + /** + * @enable: This parameter indicates whether hardware cursor should be + * enabled. */ bool enable; - /* Translate cursor x/y by the source rectangle for each plane. */ + /** + * @translate_by_source: Translate cursor x/y by the source rectangle + * for each plane. + */ bool translate_by_source; }; @@ -494,7 +518,9 @@ struct dc_gamma { /* Used by both ipp amd opp functions*/ /* TODO: to be consolidated with enum color_space */ -/* +/** + * enum dc_cursor_color_format - DC cursor programming mode + * * This enum is for programming CURSOR_MODE register field. What this register * should be programmed to depends on OS requested cursor shape flags and what * we stored in the cursor surface. @@ -530,17 +556,39 @@ union dc_cursor_attribute_flags { }; struct dc_cursor_attributes { + /** + * @address: This field represents the framebuffer address associated + * with the cursor. It is important to highlight that this address is + * divided into a high and low parts. + */ PHYSICAL_ADDRESS_LOC address; + + /** + * @pitch: Cursor line stride. + */ uint32_t pitch; - /* Width and height should correspond to cursor surface width x heigh */ + /** + * @width: Width should correspond to cursor surface width. + */ uint32_t width; + /** + * @heigh: Height should correspond to cursor surface heigh. + */ uint32_t height; + /** + * @color_format: DC cursor programming mode. + */ enum dc_cursor_color_format color_format; - uint32_t sdr_white_level; // for boosting (SDR) cursor in HDR mode + /** + * @sdr_white_level: Boosting (SDR) cursor in HDR mode. + */ + uint32_t sdr_white_level; - /* In case we support HW Cursor rotation in the future */ + /** + * @rotation_angle: In case we support HW Cursor rotation in the future + */ enum dc_rotation_angle rotation_angle; union dc_cursor_attribute_flags attribute_flags; From a48bbd75f956662291a51c3972d2f9ff8b47fdeb Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 27 Aug 2021 10:34:37 -0400 Subject: [PATCH 37/79] drm/amd/display: Add basic kernel doc to CRC code under DC Reviewed-by: Harry Wentland Acked-by: Tom Chung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/core/dc.c | 17 +++++++------ .../gpu/drm/amd/display/dc/dcn10/dcn10_optc.c | 25 ++++++++++++++++--- .../gpu/drm/amd/display/dc/inc/core_types.h | 4 +++ .../amd/display/dc/inc/hw/timing_generator.h | 14 ++++++++--- 4 files changed, 46 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 4e778f5140b81..39736eb4cfe3c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -642,14 +642,17 @@ bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream, /** * dc_stream_get_crc() - Get CRC values for the given stream. - * @dc: DC object + * + * @dc: DC object. * @stream: The DC stream state of the stream to get CRCs from. - * @r_cr: CRC value for the first of the 3 channels stored here. - * @g_y: CRC value for the second of the 3 channels stored here. - * @b_cb: CRC value for the third of the 3 channels stored here. + * @r_cr: CRC value for the red component. + * @g_y: CRC value for the green component. + * @b_cb: CRC value for the blue component. * * dc_stream_configure_crc needs to be called beforehand to enable CRCs. - * Return false if stream is not found, or if CRCs are not enabled. + * + * Return: + * false if stream is not found, or if CRCs are not enabled. */ bool dc_stream_get_crc(struct dc *dc, struct dc_stream_state *stream, uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb) @@ -4311,8 +4314,8 @@ void dc_mclk_switch_using_fw_based_vblank_stretch_shut_down(struct dc *dc) /* ***************************************************************************** * Function: dc_is_dmub_outbox_supported - - * - * @brief + * + * @brief * Checks whether DMUB FW supports outbox notifications, if supported * DM should register outbox interrupt prior to actually enabling interrupts * via dc_enable_dmub_outbox diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 968b089d86e92..754b6a58d9074 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -1493,8 +1493,23 @@ bool optc1_configure_crc(struct timing_generator *optc, return true; } +/** + * optc1_get_crc - Capture CRC result per component + * + * @optc: timing_generator instance. + * @r_cr: 16-bit primary CRC signature for red data. + * @g_y: 16-bit primary CRC signature for green data. + * @b_cb: 16-bit primary CRC signature for blue data. + * + * This function reads the CRC signature from the OPTC registers. Notice that + * we have three registers to keep the CRC result per color component (RGB). + * + * Returns: + * If CRC is disabled, return false; otherwise, return true, and the CRC + * results in the parameters. + */ bool optc1_get_crc(struct timing_generator *optc, - uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb) + uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb) { uint32_t field = 0; struct optc *optc1 = DCN10TG_FROM_TG(optc); @@ -1505,12 +1520,14 @@ bool optc1_get_crc(struct timing_generator *optc, if (!field) return false; + /* OTG_CRC0_DATA_RG has the CRC16 results for the red and green component */ REG_GET_2(OTG_CRC0_DATA_RG, - CRC0_R_CR, r_cr, - CRC0_G_Y, g_y); + CRC0_R_CR, r_cr, + CRC0_G_Y, g_y); + /* OTG_CRC0_DATA_B has the CRC16 results for the blue component */ REG_GET(OTG_CRC0_DATA_B, - CRC0_B_CB, b_cb); + CRC0_B_CB, b_cb); return true; } diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 4cdcb9751a9cb..598e36108dc8b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -402,6 +402,10 @@ struct pipe_ctx { struct dc_stream_state *stream; struct plane_resource plane_res; + + /** + * @stream_res: Reference to DCN resource components such OPP and DSC. + */ struct stream_resource stream_res; struct link_resource link_res; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 7f97718e68493..7e43b4403b3e3 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -139,7 +139,13 @@ struct crc_params { bool enable; }; +/** + * struct timing_generator - Entry point to Output Timing Generator feature. + */ struct timing_generator { + /** + * @funcs: Timing generator control functions + */ const struct timing_generator_funcs *funcs; struct dc_bios *bp; struct dc_context *ctx; @@ -150,7 +156,9 @@ struct dc_crtc_timing; struct drr_params; - +/** + * struct timing_generator_funcs - Control timing generator on a given device. + */ struct timing_generator_funcs { bool (*validate_timing)(struct timing_generator *tg, const struct dc_crtc_timing *timing); @@ -275,8 +283,8 @@ struct timing_generator_funcs { const struct crc_params *params); /** - * Get CRCs for the given timing generator. Return false if CRCs are - * not enabled (via configure_crc). + * @get_crc: Get CRCs for the given timing generator. Return false if + * CRCs are not enabled (via configure_crc). */ bool (*get_crc)(struct timing_generator *tg, uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb); From 8f3544a81c183291b714e7756ebef2e9173cbd99 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 21 Feb 2022 15:22:50 -0500 Subject: [PATCH 38/79] drm/amd/display: Add some extra kernel doc to amdgpu_dm Reviewed-by: Harry Wentland Acked-by: Tom Chung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 16 +++++-- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 46 +++++++++++++++++-- 2 files changed, 54 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0b754d909fb98..3ee96e36423c0 100755 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -10226,6 +10226,7 @@ static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm /** * amdgpu_dm_atomic_check() - Atomic check implementation for AMDgpu DM. + * * @dev: The DRM device * @state: The atomic state to commit * @@ -10945,8 +10946,18 @@ static int parse_hdmi_amd_vsdb(struct amdgpu_dm_connector *aconnector, return valid_vsdb_found ? i : -ENODEV; } +/** + * amdgpu_dm_update_freesync_caps - Update Freesync capabilities + * + * @aconnector: Connector to query. + * + * Amdgpu supports Freesync in DP and HDMI displays, and it is required to keep + * track of some of the display information in the internal data struct used by + * amdgpu_dm. This function checks which type of connector we need to set the + * FreeSync parameters. + */ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, - struct edid *edid) + struct edid *edid) { int i = 0; struct detailed_timing *timing; @@ -10959,8 +10970,8 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, struct drm_device *dev = connector->dev; struct amdgpu_device *adev = drm_to_adev(dev); - bool freesync_capable = false; struct amdgpu_hdmi_vsdb_info vsdb_info = {0}; + bool freesync_capable = false; if (!connector->state) { DRM_ERROR("%s - Connector has no state", __func__); @@ -10991,7 +11002,6 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, if (!adev->dm.freesync_module) goto update; - if (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT || sink->sink_signal == SIGNAL_TYPE_EDP) { bool edid_check_required = false; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index ec12ee7a1f1a8..74fa9e7cde9c1 100755 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -633,6 +633,10 @@ struct amdgpu_dm_connector { * The 'current' sink is in dc_link->sink. */ struct dc_sink *dc_sink; struct dc_link *dc_link; + + /** + * @dc_em_sink: Reference to the emulated (virtual) sink. + */ struct dc_sink *dc_em_sink; /* DM only */ @@ -645,7 +649,16 @@ struct amdgpu_dm_connector { struct amdgpu_i2c_adapter *i2c; /* Monitor range limits */ - int min_vfreq ; + /** + * @min_vfreq: Minimal frequency supported by the display in Hz. This + * value is set to zero when there is no FreeSync support. + */ + int min_vfreq; + + /** + * @max_vfreq: Maximum frequency supported by the display in Hz. This + * value is set to zero when there is no FreeSync support. + */ int max_vfreq ; int pixel_clock_mhz; @@ -757,11 +770,34 @@ struct dm_connector_state { #endif }; +/** + * struct amdgpu_hdmi_vsdb_info - Keep track of the VSDB info + * + * AMDGPU supports FreeSync over HDMI by using the VSDB section, and this + * struct is useful to keep track of the display-specific information about + * FreeSync. + */ struct amdgpu_hdmi_vsdb_info { - unsigned int amd_vsdb_version; /* VSDB version, should be used to determine which VSIF to send */ - bool freesync_supported; /* FreeSync Supported */ - unsigned int min_refresh_rate_hz; /* FreeSync Minimum Refresh Rate in Hz */ - unsigned int max_refresh_rate_hz; /* FreeSync Maximum Refresh Rate in Hz */ + /** + * @amd_vsdb_version: Vendor Specific Data Block Version, should be + * used to determine which Vendor Specific InfoFrame (VSIF) to send. + */ + unsigned int amd_vsdb_version; + + /** + * @freesync_supported: FreeSync Supported. + */ + bool freesync_supported; + + /** + * @min_refresh_rate_hz: FreeSync Minimum Refresh Rate in Hz. + */ + unsigned int min_refresh_rate_hz; + + /** + * @max_refresh_rate_hz: FreeSync Maximum Refresh Rate in Hz + */ + unsigned int max_refresh_rate_hz; }; From 3059a12b2c4497095013d8087c211146909c12af Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 1 Apr 2022 14:46:07 -0400 Subject: [PATCH 39/79] drm/amd/display: Document pipe split policy In the DCN code, we can find some references to three different pipe split policies but no clear description. This commit adds some kernel-doc that explains more about these options and a little bit of their impact. Reviewed-by: Harry Wentland Acked-by: Tom Chung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/dc.h | 35 +++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 979b8bfa16962..b0e3f9e0888cf 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -435,9 +435,31 @@ enum dcc_option { DCC_HALF_REQ_DISALBE = 2, }; +/** + * enum pipe_split_policy - Pipe split strategy supported by DCN + * + * This enum is used to define the pipe split policy supported by DCN. By + * default, DC favors MPC_SPLIT_DYNAMIC. + */ enum pipe_split_policy { + /** + * @MPC_SPLIT_DYNAMIC: DC will automatically decide how to split the + * pipe in order to bring the best trade-off between performance and + * power consumption. This is the recommended option. + */ MPC_SPLIT_DYNAMIC = 0, + + /** + * @MPC_SPLIT_DYNAMIC: Avoid pipe split, which means that DC will not + * try any sort of split optimization. + */ MPC_SPLIT_AVOID = 1, + + /** + * @MPC_SPLIT_DYNAMIC: With this option, DC will only try to optimize + * the pipe utilization when using a single display; if the user + * connects to a second display, DC will avoid pipe split. + */ MPC_SPLIT_AVOID_MULT_DISP = 2, }; @@ -672,6 +694,14 @@ struct dc_state; struct resource_pool; struct dce_hwseq; +/** + * struct dc_debug_options - DC debug struct + * + * This struct provides a simple mechanism for developers to change some + * configurations, enable/disable features, and activate extra debug options. + * This can be very handy to narrow down whether some specific feature is + * causing an issue or not. + */ struct dc_debug_options { bool native422_support; bool disable_dsc; @@ -691,6 +721,11 @@ struct dc_debug_options { bool disable_stutter; bool use_max_lb; enum dcc_option disable_dcc; + + /** + * @pipe_split_policy: Define which pipe split policy is used by the + * display core. + */ enum pipe_split_policy pipe_split_policy; bool force_single_disp_pipe_split; bool voltage_align_fclk; From 61358c8047ff860c1f5031028aaa73020733331f Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 11 Mar 2022 14:26:58 -0500 Subject: [PATCH 40/79] drm/amd/display: Expand documentation for timing The timing programming inside DCN is far from trivial, it has multiple parameters associated with that, and the lack of documentation does not help comprehend this already complicated topic. This commit tries to improve this situation by expanding the documentation of dc_crtc_timing and the VTG program function. Reviewed-by: Harry Wentland Acked-by: Tom Chung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/dc_hw_types.h | 85 +++++++++++++++++++ .../gpu/drm/amd/display/dc/dcn10/dcn10_optc.c | 14 +++ 2 files changed, 99 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index 44999a2e4cbca..848db8676adfd 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -813,22 +813,107 @@ struct dc_dsc_config { uint32_t mst_pbn; /* pbn of display on dsc mst hub */ }; +/** + * struct dc_crtc_timing - Timing parameters used to configure DCN blocks + * + * DCN provides multiple signals and parameters that can be used to adjust + * timing parameters, this struct aggregate multiple of these values for easy + * access. In this struct, fields prefixed with h_* are related to horizontal + * timing, and v_* to vertical timing. Keep in mind that when we talk about + * vertical timings, the values, in general, are described in the number of + * lines; on the other hand, the horizontal values are in pixels. + */ struct dc_crtc_timing { + /** + * @h_total: The total number of pixels from the rising edge of HSync + * until the rising edge of the current HSync. + */ uint32_t h_total; + + /** + * @h_border_left: The black pixels related to the left border + */ uint32_t h_border_left; + + /** + * @h_addressable: It is the range of pixels displayed horizontally. + * For example, if the display resolution is 3840@2160, the horizontal + * addressable area is 3840. + */ uint32_t h_addressable; + + /** + * @h_border_right: The black pixels related to the right border + */ uint32_t h_border_right; + + /** + * @h_front_porch: Period (in pixels) between HBlank start and the + * rising edge of HSync. + */ uint32_t h_front_porch; + + /** + * @h_sync_width: HSync duration in pixels. + */ uint32_t h_sync_width; + /** + * @v_total: It is the total number of lines from the rising edge of + * the previous VSync until the rising edge of the current VSync. + * + * |--------------------------| + * +-+ V_TOTAL +-+ + * | | | | + * VSync ---+ +--------- // -----------+ +--- + */ uint32_t v_total; + + /** + * @v_border_top: The black border on the top. + */ uint32_t v_border_top; + + /** + * @v_addressable: It is the range of the scanout at which the + * framebuffer is displayed. For example, if the display resolution is + * 3840@2160, the addressable area is 2160 lines, or if the resolution + * is 1920x1080, the addressable area is 1080 lines. + */ uint32_t v_addressable; + + /** + * @v_border_bottom: The black border on the bottom. + */ uint32_t v_border_bottom; + + /** + * @v_front_porch: Period (in lines) between VBlank start and rising + * edge of VSync. + * +-+ + * VSync | | + * ----------+ +--------... + * +------------------... + * VBlank | + * --+ + * |-------| + * v_front_porch + */ uint32_t v_front_porch; + + /** + * @v_sync_width: VSync signal width in lines. + */ uint32_t v_sync_width; + /** + * @pix_clk_100hz: Pipe pixel precision + * + * This field is used to communicate pixel clocks with 100 Hz accuracy + * from dc_crtc_timing to BIOS command table. + */ uint32_t pix_clk_100hz; + uint32_t min_refresh_in_uhz; uint32_t vic; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 754b6a58d9074..ac8bc335bf1a9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -312,6 +312,20 @@ void optc1_program_timing( } } +/** + * optc1_set_vtg_params - Set Vertical Timing Generator (VTG) parameters + * + * @optc: timing_generator struct used to extract the optc parameters + * @dc_crtc_timing: Timing parameters configured + * @program_fp2: Boolean value indicating if FP2 will be programmed or not + * + * OTG is responsible for generating the global sync signals, including + * vertical timing information for each HUBP in the dcfclk domain. Each VTG is + * associated with one OTG that provides HUBP with vertical timing information + * (i.e., there is 1:1 correspondence between OTG and VTG). This function is + * responsible for setting the OTG parameters to the VTG during the pipe + * programming. + */ void optc1_set_vtg_params(struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing, bool program_fp2) { From 65ab170f4522cda1407abeac665a8ae88ba87299 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Tue, 10 May 2022 10:07:33 -0400 Subject: [PATCH 41/79] drm/amd/display: Document some of the DML structs Reviewed-by: Harry Wentland Acked-by: Tom Chung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler --- .../amd/display/dc/dml/display_mode_structs.h | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index e8b094006d95d..8538588e77545 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -26,6 +26,16 @@ #include "dc_features.h" #include "display_mode_enums.h" +/** + * DOC: overview + * + * Most of the DML code is automatically generated and tested via hardware + * description language. Usually, we use the reference _vcs_dpi in the code + * where VCS means "Verilog Compiled Simulator" and DPI stands for "Direct + * Programmer Interface". In other words, those structs can be used to + * interface with Verilog with other languages such as C. + */ + #ifndef __DISPLAY_MODE_STRUCTS_H__ #define __DISPLAY_MODE_STRUCTS_H__ @@ -159,6 +169,14 @@ struct _vcs_dpi_voltage_scaling_st { double dtbclk_mhz; }; +/** + * _vcs_dpi_soc_bounding_box_st: SOC definitions + * + * This struct maintains the SOC Bounding Box information for the ASIC; it + * defines things such as clock, voltage, performance, etc. Usually, we load + * these values from VBIOS; if something goes wrong, we use some hard-coded + * values, which will enable the ASIC to light up with limitations. + */ struct _vcs_dpi_soc_bounding_box_st { struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES]; /* @@ -166,6 +184,11 @@ struct _vcs_dpi_soc_bounding_box_st { * clock table. Do not use outside of *update_bw_boudning_box functions. */ struct _vcs_dpi_voltage_scaling_st _clock_tmp[DC__VOLTAGE_STATES]; + + /** + * @num_states: It represents the total of Display Power Management + * (DPM) supported by the specific ASIC. + */ unsigned int num_states; double sr_exit_time_us; double sr_enter_plus_exit_time_us; @@ -231,6 +254,14 @@ struct _vcs_dpi_soc_bounding_box_st { enum self_refresh_affinity allow_dram_self_refresh_or_dram_clock_change_in_vblank; }; +/** + * @_vcs_dpi_ip_params_st: IP configuraion for DCN blocks + * + * In this struct you can find the DCN configuration associated to the specific + * ASIC. For example, here we can save how many DPPs the ASIC is using and it + * is available. + * + */ struct _vcs_dpi_ip_params_st { bool use_min_dcfclk; bool clamp_min_dcfclk; @@ -283,6 +314,9 @@ struct _vcs_dpi_ip_params_st { unsigned int writeback_line_buffer_chroma_buffer_size; unsigned int max_page_table_levels; + /** + * @max_num_dpp: Maximum number of DPP supported in the target ASIC. + */ unsigned int max_num_dpp; unsigned int max_num_otg; unsigned int cursor_chunk_size; From 087b781ca940d39e06d80f25609a572d6f4e3a47 Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Wed, 20 Jul 2022 17:54:05 -0400 Subject: [PATCH 42/79] drm/amd/display: Allow alternate prefetch modes in DML for DCN32 [Why] Driver is restricting voltage levels if system cannot switch in vblank. [How] Change allow_for_pstate_or_stutter_in_vblank_final from dm_prefetch_support_uclk_fclk_and_stutter to dm_prefetch_support_uclk_fclk_and_stutter_if_possible. Add support for a new registry property, DalDMLDisallowAlternatePrefetchModes, for easier debugging. Reviewed-by: Alvin Lee Reviewed-by: Jun Lei Acked-by: Tom Chung Signed-off-by: David Galiffi Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 30 ++++++++++++++----- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index b0e3f9e0888cf..721a04fe85d95 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -840,6 +840,7 @@ struct dc_debug_options { uint32_t mst_start_top_delay; uint8_t psr_power_use_phy_fsm; enum dml_hostvm_override_opts dml_hostvm_override; + bool dml_disallow_alternate_prefetch_modes; bool use_legacy_soc_bb_mechanism; bool exit_idle_opt_for_cursor_updates; bool enable_single_display_2to1_odm_policy; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index de27c5a31e71d..8d14be6a2ce5d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -983,9 +983,15 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, * DML favors voltage over p-state, but we're more interested in * supporting p-state over voltage. We can't support p-state in * prefetch mode > 0 so try capping the prefetch mode to start. + * Override present for testing. */ - context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = + if (dc->debug.dml_disallow_alternate_prefetch_modes) + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = dm_prefetch_support_uclk_fclk_and_stutter; + else + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = + dm_prefetch_support_uclk_fclk_and_stutter_if_possible; + *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); /* This may adjust vlevel and maxMpcComb */ if (*vlevel < context->bw_ctx.dml.soc.num_states) @@ -1014,7 +1020,9 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, * will not allow for switch in VBLANK. The DRR display must have it's VBLANK stretched * enough to support MCLK switching. */ - if (*vlevel == context->bw_ctx.dml.soc.num_states) { + if (*vlevel == context->bw_ctx.dml.soc.num_states && + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final == + dm_prefetch_support_uclk_fclk_and_stutter) { context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = dm_prefetch_support_stutter; /* There are params (such as FabricClock) that need to be recalculated @@ -1335,7 +1343,8 @@ bool dcn32_internal_validate_bw( int split[MAX_PIPES] = { 0 }; bool merge[MAX_PIPES] = { false }; bool newly_split[MAX_PIPES] = { false }; - int pipe_cnt, i, pipe_idx, vlevel; + int pipe_cnt, i, pipe_idx; + int vlevel = context->bw_ctx.dml.soc.num_states; struct vba_vars_st *vba = &context->bw_ctx.dml.vba; ASSERT(pipes); @@ -1362,17 +1371,22 @@ bool dcn32_internal_validate_bw( DC_FP_END(); } - if (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states || - vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) { + if (fast_validate || + dc->debug.dml_disallow_alternate_prefetch_modes && + (vlevel == context->bw_ctx.dml.soc.num_states || + vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported)) { /* - * If mode is unsupported or there's still no p-state support then - * fall back to favoring voltage. + * If dml_disallow_alternate_prefetch_modes is false, then we have already + * tried alternate prefetch modes during full validation. + * + * If mode is unsupported or there is no p-state support, then + * fall back to favouring voltage. * * We don't actually support prefetch mode 2, so require that we * at least support prefetch mode 1. */ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = - dm_prefetch_support_stutter; + dm_prefetch_support_stutter; vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); if (vlevel < context->bw_ctx.dml.soc.num_states) { From 27438bed2c90184e93169c11a06db5165fc757bc Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Fri, 22 Jul 2022 16:03:13 -0400 Subject: [PATCH 43/79] drm/amd/display: Fix Compile-time Warning Fixed bracketing around condition statement. Reviewed-by: Alvin Lee Reviewed-by: Jun Lei Acked-by: Tom Chung Signed-off-by: David Galiffi Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 8d14be6a2ce5d..ae45f52b9fe20 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -1372,9 +1372,9 @@ bool dcn32_internal_validate_bw( } if (fast_validate || - dc->debug.dml_disallow_alternate_prefetch_modes && + (dc->debug.dml_disallow_alternate_prefetch_modes && (vlevel == context->bw_ctx.dml.soc.num_states || - vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported)) { + vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported))) { /* * If dml_disallow_alternate_prefetch_modes is false, then we have already * tried alternate prefetch modes during full validation. From d1b681992faa6d3d868b7bfdcd390b541556fe64 Mon Sep 17 00:00:00 2001 From: Duncan Ma Date: Mon, 25 Jul 2022 15:26:39 -0400 Subject: [PATCH 44/79] drm/amd/display: Fix VPG instancing for dcn314 HPO [Why] An issue during VPG indexing offset generation causing to use the incorrect VPG. HW team placed VPG instances 5 at end of list, making it VPG 9 in register headers. [How] Correct VPG instance for HPO encoders. Reviewed-by: Charlene Liu Acked-by: Tom Chung Signed-off-by: Duncan Ma Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index 13371331f888a..ecd2e4446ea1f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -1256,7 +1256,7 @@ static struct stream_encoder *dcn314_stream_encoder_create( int afmt_inst; /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ - if (eng_id <= ENGINE_ID_DIGF) { + if (eng_id < ENGINE_ID_DIGF) { vpg_inst = eng_id; afmt_inst = eng_id; } else @@ -1301,7 +1301,8 @@ static struct hpo_dp_stream_encoder *dcn31_hpo_dp_stream_encoder_create( * VPG[8] -> HPO_DP[2] * VPG[9] -> HPO_DP[3] */ - vpg_inst = hpo_dp_inst + 6; + //Uses offset index 5-8, but actually maps to vpg_inst 6-9 + vpg_inst = hpo_dp_inst + 5; /* Mapping of APG register blocks to HPO DP block instance: * APG[0] -> HPO_DP[0] From cf88667a8d31c715e49e9e713a04651d0be3f8d5 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Tue, 26 Jul 2022 13:13:27 -0400 Subject: [PATCH 45/79] drm/amd/display: Check correct bounds for stream encoder instances for DCN303 [Why & How] eng_id for DCN303 cannot be more than 1, since we have only two instances of stream encoders. Check the correct boundary condition for engine ID for DCN303 prevent the potential out of bounds access. Fixes: cd6d421e3d1a ("drm/amd/display: Initial DC support for Beige Goby") Reported-by: Dan Carpenter Cc: Reviewed-by: Chris Park Reviewed-by: Rodrigo Siqueira Acked-by: Tom Chung Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c index f06b24dac4fc1..372b79cf35a15 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -374,7 +374,7 @@ static struct stream_encoder *dcn303_stream_encoder_create(enum engine_id eng_id int afmt_inst; /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ - if (eng_id <= ENGINE_ID_DIGE) { + if (eng_id <= ENGINE_ID_DIGB) { vpg_inst = eng_id; afmt_inst = eng_id; } else From 5f8fabf9f9c2364d6148dcb0cc1cdc054e1109af Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Tue, 26 Jul 2022 14:12:29 -0400 Subject: [PATCH 46/79] drm/amd/display: Enable SubVP by default on DCN32 & DCN321 [Why&How] Set the debug options to enable SubVP feature Reviewed-by: Alvin Lee Acked-by: Tom Chung Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 60f130a84eebb..f3c8023d41b73 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -869,7 +869,7 @@ static const struct dc_debug_options debug_defaults_drv = { } }, .use_max_lb = true, - .force_disable_subvp = true, + .force_disable_subvp = false, .exit_idle_opt_for_cursor_updates = true, .enable_single_display_2to1_odm_policy = true, .enable_dp_dig_pixel_rate_div_policy = 1, diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index 7af19db015143..4406fe9c9f2a8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -868,7 +868,7 @@ static const struct dc_debug_options debug_defaults_drv = { } }, .use_max_lb = true, - .force_disable_subvp = true, + .force_disable_subvp = false, .exit_idle_opt_for_cursor_updates = true, .enable_single_display_2to1_odm_policy = true, .enable_dp_dig_pixel_rate_div_policy = 1, From ae3d47444d099bbf117bd53e20ac7783f9223305 Mon Sep 17 00:00:00 2001 From: Duncan Ma Date: Wed, 27 Jul 2022 15:44:43 -0400 Subject: [PATCH 47/79] drm/amd/display: Correct DTBCLK for dcn314 [Why] DTBCLK clocks reset after clocks are initialized and bounding box values are also incorrect. [How] Use dcn31 init clock function programming sequence and correct bounding box values for dcn314 Reviewed-by: Nicholas Kazlauskas Reviewed-by: Jun Lei Acked-by: Tom Chung Signed-off-by: Duncan Ma Tested-by: Daniel Wheeler --- .../amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c | 12 +----------- .../amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h | 2 +- .../gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c | 10 +++++----- 3 files changed, 7 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c index ee99974b3b62b..7af19823a29db 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c @@ -307,16 +307,6 @@ static void dcn314_enable_pme_wa(struct clk_mgr *clk_mgr_base) dcn314_smu_enable_pme_wa(clk_mgr); } -void dcn314_init_clocks(struct clk_mgr *clk_mgr) -{ - memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); - // Assumption is that boot state always supports pstate - clk_mgr->clks.p_state_change_support = true; - clk_mgr->clks.prev_p_state_change_support = true; - clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN; - clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN; -} - bool dcn314_are_clock_states_equal(struct dc_clocks *a, struct dc_clocks *b) { @@ -641,7 +631,7 @@ static struct clk_mgr_funcs dcn314_funcs = { .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz, .update_clocks = dcn314_update_clocks, - .init_clocks = dcn314_init_clocks, + .init_clocks = dcn31_init_clocks, .enable_pme_wa = dcn314_enable_pme_wa, .are_clock_states_equal = dcn314_are_clock_states_equal, .notify_wm_ranges = dcn314_notify_wm_ranges diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h index c695a4498c50f..171f84340eb2f 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h @@ -42,7 +42,7 @@ struct clk_mgr_dcn314 { bool dcn314_are_clock_states_equal(struct dc_clocks *a, struct dc_clocks *b); -void dcn314_init_clocks(struct clk_mgr *clk_mgr); + void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool safe_to_lower); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c index d52a16d280e98..c80307a6af1bf 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c @@ -106,7 +106,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = { .phyclk_mhz = 600.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 186.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, { .state = 1, @@ -115,7 +115,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = { .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 209.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, { .state = 2, @@ -124,7 +124,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = { .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 209.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, { .state = 3, @@ -133,7 +133,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = { .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 371.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, { .state = 4, @@ -142,7 +142,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = { .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, .dscclk_mhz = 417.0, - .dtbclk_mhz = 625.0, + .dtbclk_mhz = 600.0, }, }, .num_states = 5, From e47b295e85e2dbe17a1da9133063fc517edceaf4 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Tue, 26 Jul 2022 19:00:23 -0400 Subject: [PATCH 48/79] drm/amd/display: Revert "attempt to fix the logic in commit_planes_for_stream()" [Description] Reverts commit "attempt to fix the logic in commit_planes_for_stream()" since it caused a regression. Reviewed-by: Martin Leung Acked-by: Tom Chung Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/core/dc.c | 27 +++++++++++------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 39736eb4cfe3c..679edd473e1fe 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3245,7 +3245,7 @@ static void commit_planes_for_stream(struct dc *dc, odm_pipe->ttu_regs.min_ttu_vblank = MAX_TTU; } - if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) { + if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) if (top_pipe_to_program && top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) { if (should_use_dmub_lock(stream->link)) { @@ -3263,7 +3263,6 @@ static void commit_planes_for_stream(struct dc *dc, top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable( top_pipe_to_program->stream_res.tg); } - } if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) { if (dc->hwss.subvp_pipe_control_lock) @@ -3482,7 +3481,7 @@ static void commit_planes_for_stream(struct dc *dc, dc->hwss.pipe_control_lock(dc, top_pipe_to_program, false); } - if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) { + if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) if (top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) { top_pipe_to_program->stream_res.tg->funcs->wait_for_state( top_pipe_to_program->stream_res.tg, @@ -3509,21 +3508,19 @@ static void commit_planes_for_stream(struct dc *dc, top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_disable( top_pipe_to_program->stream_res.tg); } - } - if (update_type != UPDATE_TYPE_FAST) { + if (update_type != UPDATE_TYPE_FAST) dc->hwss.post_unlock_program_front_end(dc, context); - /* Since phantom pipe programming is moved to post_unlock_program_front_end, - * move the SubVP lock to after the phantom pipes have been setup - */ - if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) { - if (dc->hwss.subvp_pipe_control_lock) - dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, NULL, subvp_prev_use); - } else { - if (dc->hwss.subvp_pipe_control_lock) - dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, top_pipe_to_program, subvp_prev_use); - } + /* Since phantom pipe programming is moved to post_unlock_program_front_end, + * move the SubVP lock to after the phantom pipes have been setup + */ + if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) { + if (dc->hwss.subvp_pipe_control_lock) + dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, NULL, subvp_prev_use); + } else { + if (dc->hwss.subvp_pipe_control_lock) + dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, top_pipe_to_program, subvp_prev_use); } // Fire manual trigger only when bottom plane is flipped From d4527b100667a3495eedc6abeb9f4f6c9b8c6e3b Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Thu, 28 Jul 2022 09:51:05 -0400 Subject: [PATCH 49/79] drm/amd/display: For stereo keep "FLIP_ANY_FRAME" [Description] Observed in stereomode that programming FLIP_LEFT_EYE can cause hangs. Keep FLIP_ANY_FRAME in stereo mode so the surface flip can take place before left or right eye Reviewed-by: Martin Leung Acked-by: Tom Chung Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c index 6a4dcafb9bba5..dc3e8df706b34 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c @@ -86,7 +86,7 @@ bool hubp3_program_surface_flip_and_addr( VMID, address->vmid); if (address->type == PLN_ADDR_TYPE_GRPH_STEREO) { - REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0x1); + REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0); REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x1); } else { From 2abf2573b1c69b7741294469fc36683fece7fdc9 Mon Sep 17 00:00:00 2001 From: Joseph Greathouse Date: Thu, 4 Aug 2022 09:54:57 -0500 Subject: [PATCH 50/79] drm/amdgpu: Enable translate_further to extend UTCL2 reach MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable translate_further on Arcturus and Aldebaran server chips in order to increase the UTCL2 reach from 8 GiB to 64 GiB, which is more in line with the amount of framebuffer DRAM in the devices. Signed-off-by: Joseph Greathouse Acked-by: Felix Kuehling Acked-by: Kent Russell Acked-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 4f4aaed3a0974..1fca5d7dbd246 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1619,12 +1619,15 @@ static int gmc_v9_0_sw_init(void *handle) amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47); else amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) + adev->gmc.translate_further = adev->vm_manager.num_level > 1; break; case IP_VERSION(9, 4, 1): adev->num_vmhubs = 3; /* Keep the vm size same with Vega20 */ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); + adev->gmc.translate_further = adev->vm_manager.num_level > 1; break; default: break; From 70965215dade9775d386527d4b49d494fbee3e0b Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Thu, 28 Jul 2022 10:02:25 -0400 Subject: [PATCH 51/79] drm/amd/display: Don't try to enter MALL SS if stereo3d [Description] MALL not supported with stereo3D according to DCN doc. Reviewed-by: Jun Lei Acked-by: Tom Chung Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler --- .../drm/amd/display/dc/dcn32/dcn32_hwseq.c | 26 ++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index 872b30281c9a7..bb815654dbd22 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -332,7 +332,9 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable) { union dmub_rb_cmd cmd; - uint8_t ways; + uint8_t ways, i, j; + bool stereo_in_use = false; + struct dc_plane_state *plane = NULL; if (!dc->ctx->dmub_srv) return false; @@ -361,7 +363,23 @@ bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable) * and configure HUBP's to fetch from MALL */ ways = dcn32_calculate_cab_allocation(dc, dc->current_state); - if (ways <= dc->caps.cache_num_ways) { + + /* MALL not supported with Stereo3D. If any plane is using stereo, + * don't try to enter MALL. + */ + for (i = 0; i < dc->current_state->stream_count; i++) { + for (j = 0; j < dc->current_state->stream_status[i].plane_count; j++) { + plane = dc->current_state->stream_status[i].plane_states[j]; + + if (plane->address.type == PLN_ADDR_TYPE_GRPH_STEREO) { + stereo_in_use = true; + break; + } + } + if (stereo_in_use) + break; + } + if (ways <= dc->caps.cache_num_ways && !stereo_in_use) { memset(&cmd, 0, sizeof(cmd)); cmd.cab.header.type = DMUB_CMD__CAB_FOR_SS; cmd.cab.header.sub_type = DMUB_CMD__CAB_DCN_SS_FIT_IN_CAB; @@ -695,9 +713,11 @@ void dcn32_update_mall_sel(struct dc *dc, struct dc_state *context) if (pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { hubp->funcs->hubp_update_mall_sel(hubp, 1, false); } else { + // MALL not supported with Stereo3D hubp->funcs->hubp_update_mall_sel(hubp, num_ways <= dc->caps.cache_num_ways && - pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED ? 2 : 0, + pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED && + pipe->plane_state->address.type != PLN_ADDR_TYPE_GRPH_STEREO ? 2 : 0, cache_cursor); } } From dbaf4bff0749b020b613680e2ca913be2142a807 Mon Sep 17 00:00:00 2001 From: Fudong Wang Date: Wed, 27 Jul 2022 12:01:29 +0800 Subject: [PATCH 52/79] drm/amd/display: clear optc underflow before turn off odm clock [Why] After ODM clock off, optc underflow bit will be kept there always and clear not work. We need to clear that before clock off. [How] Clear that if have when clock off. Reviewed-by: Alvin Lee Acked-by: Tom Chung Signed-off-by: Fudong Wang Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index ac8bc335bf1a9..0877ab143b98b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -479,6 +479,11 @@ void optc1_enable_optc_clock(struct timing_generator *optc, bool enable) OTG_CLOCK_ON, 1, 1, 1000); } else { + + //last chance to clear underflow, otherwise, it will always there due to clock is off. + if (optc->funcs->is_optc_underflow_occurred(optc) == true) + optc->funcs->clear_optc_underflow(optc); + REG_UPDATE_2(OTG_CLOCK_CONTROL, OTG_CLOCK_GATE_DIS, 0, OTG_CLOCK_EN, 0); From e9a42c56cae1eae7c6bd9fc4a55fdd5407d0a831 Mon Sep 17 00:00:00 2001 From: Meenakshikumar Somasundaram Date: Mon, 11 Jul 2022 18:37:41 -0400 Subject: [PATCH 53/79] drm/amd/display: Fix TDR eDP and USB4 display light up issue [Why] After TDR recovery, eDP and USB4 display does not light up. Because dmub outbox notifications are not enabled after dmub reload and link encoder assignments for the streams are not cleared before dc state reset. [How] - Dmub outbox notification is enabled after tdr recovery by issuing inbox command to dmub. - Link encoders for the streams are unassigned before dc state reset. Reviewed-by: Jimmy Kizito Reviewed-by: Jun Lei Acked-by: Tom Chung Signed-off-by: Meenakshikumar Somasundaram Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/core/dc.c | 1 + drivers/gpu/drm/amd/display/dc/dc_link.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 679edd473e1fe..cf99097887076 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -4380,6 +4380,7 @@ void dc_enable_dmub_outbox(struct dc *dc) struct dc_context *dc_ctx = dc->ctx; dmub_enable_outbox_notification(dc_ctx->dmub_srv); + DC_LOG_DC("%s: dmub outbox notifications enabled\n", __func__); } /** diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index a0af0f6afeef8..9544abf75e846 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -344,6 +344,7 @@ enum dc_detect_reason { DETECT_REASON_HPDRX, DETECT_REASON_FALLBACK, DETECT_REASON_RETRAIN, + DETECT_REASON_TDR, }; bool dc_link_detect(struct dc_link *dc_link, enum dc_detect_reason reason); From 1ae8563f0f8d92b0c05fb60dcf03f325a4a2ef6c Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Thu, 21 Jul 2022 10:23:41 -0400 Subject: [PATCH 54/79] drm/amd/display: Add 16 lines margin for SubVP [Description] SUBVP_START_LINE must be aligned to 2 swaths, so add 16 lines of margin so the start line can be adjusted by up to 16 lines for alignment purposes in FW. Reviewed-by: Jun Lei Reviewed-by: Jun Lei Acked-by: Tom Chung Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 4 ++-- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 5 ++++- 6 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 721a04fe85d95..d21916872b76e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -264,6 +264,7 @@ struct dc_caps { uint32_t cache_num_ways; uint16_t subvp_fw_processing_delay_us; uint16_t subvp_prefetch_end_to_mall_start_us; + uint8_t subvp_swath_height_margin_lines; // subvp start line must be aligned to 2 x swath height uint16_t subvp_pstate_allow_width_us; uint16_t subvp_vertical_int_margin_us; bool seamless_odm; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 2d61c2a91cee2..c8059c28ac494 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -275,8 +275,7 @@ void dc_dmub_srv_set_drr_manual_trigger_cmd(struct dc *dc, uint32_t tg_inst) union dmub_rb_cmd cmd = { 0 }; cmd.drr_update.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; - // TODO: Uncomment once FW headers are promoted - //cmd.drr_update.header.sub_type = DMUB_CMD__FAMS_SET_MANUAL_TRIGGER; + cmd.drr_update.header.sub_type = DMUB_CMD__FAMS_SET_MANUAL_TRIGGER; cmd.drr_update.dmub_optc_state_req.tg_inst = tg_inst; cmd.drr_update.header.payload_bytes = sizeof(cmd.drr_update) - sizeof(cmd.drr_update.header); @@ -612,6 +611,7 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc, main_timing->v_total - main_timing->v_front_porch - main_timing->v_addressable; pipe_data->pipe_config.subvp_data.mall_region_lines = phantom_timing->v_addressable; pipe_data->pipe_config.subvp_data.main_pipe_index = subvp_pipe->pipe_idx; + pipe_data->pipe_config.subvp_data.is_drr = subvp_pipe->stream->ignore_msa_timing_param; // Prefetch lines is equal to VACTIVE + BP + VSYNC pipe_data->pipe_config.subvp_data.prefetch_lines = diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c index ca04bf3660d52..bd189802c7902 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c @@ -282,7 +282,7 @@ static struct timing_generator_funcs dcn32_tg_funcs = { .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable, .enable_optc_clock = optc1_enable_optc_clock, .set_vrr_m_const = optc3_set_vrr_m_const, - .set_drr = optc31_set_drr, // TODO: Update to optc32_set_drr once FW headers are promoted + .set_drr = optc32_set_drr, .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, .set_vtotal_min_max = optc3_set_vtotal_min_max, .set_static_screen_control = optc1_set_static_screen_control, diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index f3c8023d41b73..1a990e94070ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -2061,6 +2061,7 @@ static bool dcn32_resource_construct( dc->caps.max_cab_allocation_bytes = 67108864; // 64MB = 1024 * 1024 * 64 dc->caps.subvp_fw_processing_delay_us = 15; dc->caps.subvp_prefetch_end_to_mall_start_us = 15; + dc->caps.subvp_swath_height_margin_lines = 16; dc->caps.subvp_pstate_allow_width_us = 20; dc->caps.subvp_vertical_int_margin_us = 30; diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index 4406fe9c9f2a8..ef1eee2e54d9a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -1667,6 +1667,7 @@ static bool dcn321_resource_construct( dc->caps.max_cab_allocation_bytes = 33554432; // 32MB = 1024 * 1024 * 32 dc->caps.subvp_fw_processing_delay_us = 15; dc->caps.subvp_prefetch_end_to_mall_start_us = 15; + dc->caps.subvp_swath_height_margin_lines = 16; dc->caps.subvp_pstate_allow_width_us = 20; dc->caps.max_slave_planes = 1; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index ae45f52b9fe20..a51e74344698e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -473,8 +473,11 @@ void dcn32_set_phantom_stream_timing(struct dc *dc, // DML calculation for MALL region doesn't take into account FW delay // and required pstate allow width for multi-display cases + /* Add 16 lines margin to the MALL REGION because SUB_VP_START_LINE must be aligned + * to 2 swaths (i.e. 16 lines) + */ phantom_vactive = get_subviewport_lines_needed_in_mall(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx) + - pstate_width_fw_delay_lines; + pstate_width_fw_delay_lines + dc->caps.subvp_swath_height_margin_lines; // For backporch of phantom pipe, use vstartup of the main pipe phantom_bp = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); From 334b2e11b2a3b56d5a92e05fc384f09e1f330f7c Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Mon, 25 Jul 2022 01:54:00 -0400 Subject: [PATCH 55/79] drm/amd/display: 3.2.197 This version brings along following fixes: - Move fpu file to dml folder - Fix CAB allocation for multiple displays - Add a variable to update FCLK - Fix TMDS 4K@60Hz YCbCr420 corruption issue - Fix MPC tree infinite loop in some case - Fix HDMI VSIF V3 blank screen issue - Add some documentation to the code - Allow alternate prefetch modes in DML for DCN32 - Fix VPG instancing for dcn314 HPO - Check correct bounds for stream encoder instances for DCN303 - Enable SubVP by default on DCN32 & DCN321 - Fix DTBCLK not correct for dcn314 - Fix Compile-time warning - Fix the stereo mode hang issue - Fix display light up issue Acked-by: Tom Chung Signed-off-by: Aric Cyr Tested-by: Daniel Wheeler --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index d21916872b76e..a41247a00c83f 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -47,7 +47,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.196" +#define DC_VER "3.2.197" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 370b778ebe8dd6591f99c02693b4589406de7b81 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 28 Jul 2022 17:44:44 +0800 Subject: [PATCH 56/79] drm/amd/pm: add 3715 softpptable support for SMU13.0.0 Add support for 3715 softpptable. Signed-off-by: Evan Quan Reviewed-by: Hawking Zhang Reviewed-by: Feifei Xu Change-Id: Iae7360ce853a6b5fde38025d528640c9b88fc54c --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 0370482dd52bc..cd159e2401472 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -229,6 +229,7 @@ int smu_v13_0_init_pptable_microcode(struct smu_context *smu) /* * Temporary solution for SMU V13.0.0 with SCPM enabled: * - use 36831 signed pptable when pp_table_id is 3683 + * - use 37151 signed pptable when pp_table_id is 3715 * - use 36641 signed pptable when pp_table_id is 3664 or 0 * TODO: drop these when the pptable carried in vbios is ready. */ @@ -241,6 +242,9 @@ int smu_v13_0_init_pptable_microcode(struct smu_context *smu) case 3683: pptable_id = 36831; break; + case 3715: + pptable_id = 37151; + break; default: dev_err(adev->dev, "Unsupported pptable id %d\n", pptable_id); return -EINVAL; @@ -478,7 +482,7 @@ int smu_v13_0_setup_pptable(struct smu_context *smu) /* * Temporary solution for SMU V13.0.0 with SCPM disabled: - * - use 3664 or 3683 on request + * - use 3664, 3683 or 3715 on request * - use 3664 when pptable_id is 0 * TODO: drop these when the pptable carried in vbios is ready. */ @@ -489,6 +493,7 @@ int smu_v13_0_setup_pptable(struct smu_context *smu) break; case 3664: case 3683: + case 3715: break; default: dev_err(adev->dev, "Unsupported pptable id %d\n", pptable_id); From aa5d32ca100002d4c5300f49a2067fca93ae060f Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 12 Jul 2022 10:53:59 -0400 Subject: [PATCH 57/79] drm/amdkfd: let the debugger specify entry size copies for snapshots Let the debugger specify the entry size for snapshot buffer copies. This way, snapshots entries can be expanded in the future without breaking the ABI. Return the minimum entry_size supported after copy as the debugger may want to use it to inspect the saved data in a core file even if not useful on version check later in the future. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 6 ++- drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 49 ++++++++++--------- drivers/gpu/drm/amd/amdkfd/kfd_debug.h | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 +- .../amd/amdkfd/kfd_process_queue_manager.c | 23 ++++++--- include/uapi/linux/kfd_ioctl.h | 7 +-- 6 files changed, 55 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index a1ba9e73c9c67..ec50d6e02710a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -2700,7 +2700,8 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, r = pqm_get_queue_snapshot(&target->pqm, exception_mask, /* Clear mask */ (void __user *)args->ptr, - args->data1); + args->data1, + &args->data2); args->data1 = r < 0 ? 0 : r; if (r > 0) @@ -2740,7 +2741,8 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, r = kfd_dbg_trap_device_snapshot(target, exception_mask, (void __user *) args->ptr, - &args->data1); + &args->data1, + &args->data2); break; case KFD_IOC_DBG_TRAP_RUNTIME_ENABLE: if (data1) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index 6e8bcf42dc96d..c69ab584d1d25 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -916,23 +916,22 @@ int kfd_dbg_trap_query_exception_info(struct kfd_process *target, int kfd_dbg_trap_device_snapshot(struct kfd_process *target, uint64_t exception_clear_mask, void __user *user_info, - uint32_t *number_of_device_infos) + uint32_t *number_of_device_infos, + uint32_t *entry_size) { - int i; - struct kfd_dbg_device_info_entry device_info[MAX_GPU_INSTANCE]; - - if (!target) - return -EINVAL; + struct kfd_dbg_device_info_entry device_info = {0}; + uint32_t tmp_entry_size = *entry_size; + int i, r = 0; - if (!user_info || !number_of_device_infos) + if (!(target && user_info && number_of_device_infos && entry_size)) return -EINVAL; if (*number_of_device_infos < target->n_pdds) { - *number_of_device_infos = target->n_pdds; - return -ENOSPC; + r = -ENOSPC; + goto out; } - memset(device_info, 0, sizeof(device_info)); + *entry_size = min((size_t)entry_size, sizeof(device_info)); mutex_lock(&target->event_mutex); @@ -940,26 +939,32 @@ int kfd_dbg_trap_device_snapshot(struct kfd_process *target, for (i = 0; i < target->n_pdds; i++) { struct kfd_process_device *pdd = target->pdds[i]; - device_info[i].gpu_id = pdd->dev->id; - device_info[i].exception_status = pdd->exception_status; - device_info[i].lds_base = pdd->lds_base; - device_info[i].lds_limit = pdd->lds_limit; - device_info[i].scratch_base = pdd->scratch_base; - device_info[i].scratch_limit = pdd->scratch_limit; - device_info[i].gpuvm_base = pdd->gpuvm_base; - device_info[i].gpuvm_limit = pdd->gpuvm_limit; + device_info.gpu_id = pdd->dev->id; + device_info.exception_status = pdd->exception_status; + device_info.lds_base = pdd->lds_base; + device_info.lds_limit = pdd->lds_limit; + device_info.scratch_base = pdd->scratch_base; + device_info.scratch_limit = pdd->scratch_limit; + device_info.gpuvm_base = pdd->gpuvm_base; + device_info.gpuvm_limit = pdd->gpuvm_limit; if (exception_clear_mask) pdd->exception_status &= ~exception_clear_mask; + + if (copy_to_user(user_info, &device_info, *entry_size)) { + r = -EFAULT; + break; + } + + user_info += tmp_entry_size; } + mutex_unlock(&target->event_mutex); - if (copy_to_user(user_info, device_info, - sizeof(device_info[0]) * target->n_pdds)) - return -EFAULT; +out: *number_of_device_infos = target->n_pdds; - return 0; + return r; } int kfd_dbg_runtime_enable(struct kfd_process *p, uint64_t r_debug, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h index 8a8de64845b46..3a7505507ddcc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h @@ -93,7 +93,8 @@ int kfd_dbg_trap_query_exception_info(struct kfd_process *target, int kfd_dbg_trap_device_snapshot(struct kfd_process *target, uint64_t exception_clear_mask, void __user *user_info, - uint32_t *number_of_device_infos); + uint32_t *number_of_device_infos, + uint32_t *entry_size); void kfd_dbg_set_enabled_debug_exception_mask(struct kfd_process *target, uint64_t exception_set_mask); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 76841a60efe67..e41c6ff956f2d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1435,8 +1435,9 @@ int pqm_get_wave_state(struct process_queue_manager *pqm, int pqm_get_queue_snapshot(struct process_queue_manager *pqm, uint64_t exception_clear_mask, - struct kfd_queue_snapshot_entry __user *buf, - int num_qss_entries); + void __user *buf, + int num_qss_entries, + uint32_t *entry_size); int amdkfd_fence_wait_timeout(uint64_t *fence_addr, uint64_t fence_value, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index d7beb2fe72c24..df490ffcc505f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -583,12 +583,18 @@ int pqm_get_wave_state(struct process_queue_manager *pqm, int pqm_get_queue_snapshot(struct process_queue_manager *pqm, uint64_t exception_clear_mask, - struct kfd_queue_snapshot_entry __user *buf, - int num_qss_entries) + void __user *buf, + int num_qss_entries, + uint32_t *entry_size) { struct process_queue_node *pqn; - int r, qss_entry_count = 0; + uint32_t tmp_entry_size = *entry_size; + int qss_entry_count = 0; + if (!(*entry_size)) + return -EINVAL; + + *entry_size = min((size_t)entry_size, sizeof(struct kfd_queue_snapshot_entry)); mutex_lock(&pqm->process->event_mutex); list_for_each_entry(pqn, &pqm->queues, process_queue_list) { @@ -602,18 +608,19 @@ int pqm_get_queue_snapshot(struct process_queue_manager *pqm, set_queue_snapshot_entry(pqn->q->device->dqm, pqn->q, exception_clear_mask, &src); - r = copy_to_user(buf++, &src, sizeof(src)); - - if (r) { + if (copy_to_user(buf, &src, *entry_size)) { qss_entry_count = -EFAULT; - goto unlock; + break; } + + buf += tmp_entry_size; } qss_entry_count++; } -unlock: + mutex_unlock(&pqm->process->event_mutex); + return qss_entry_count; } diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index c368db1b834e6..9d0f70b8bd506 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -75,8 +75,9 @@ * 10.2 - Reserved * 10.3 - Pass context_save_restore_area size to user-space * 11.0 - Restrict HW mode ops access to trap activate + * 12.0 - Let the debugger specifiy entry sizes for snapshots */ -#define KFD_IOCTL_DBG_MAJOR_VERSION 11 +#define KFD_IOCTL_DBG_MAJOR_VERSION 12 #define KFD_IOCTL_DBG_MINOR_VERSION 0 struct kfd_ioctl_get_version_args { @@ -477,7 +478,7 @@ struct kfd_runtime_info { * exception_mask: exception to clear on snapshot * ptr: user buffer (IN) * data1: number of queue snapshots (IN/OUT) - 0 for IN ignores buffer writes - * data2: unused + * data2: buffer entry size in bytes (IN/OUT) * data3: unused * data4: unused */ @@ -540,7 +541,7 @@ struct kfd_runtime_info { * exception_mask: exception to clear on snapshot * ptr: user buffer for 'struct kfd_dbg_device_info_entry' entries (IN) * data1: number of devices in snapshot (IN/OUT) - * data2: unused + * data2: buffer entry size in bytes (IN/OUT) * data3: unused * data4: unused */ From 8dd0814e8f2f0bf1c06afa1e4038f7b936e9ca9b Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Wed, 13 Jul 2022 12:51:12 -0400 Subject: [PATCH 58/79] drm/amdkfd: change watchpoint set/clear to per debug device Let the debugger set/clear watchpoints by targeted debug device for a more efficient watch on device global variables. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 31 ++- drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 304 +++++++++-------------- drivers/gpu/drm/amd/amdkfd/kfd_debug.h | 4 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 7 +- include/uapi/linux/kfd_ioctl.h | 11 +- 5 files changed, 153 insertions(+), 204 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index ec50d6e02710a..847076b9a7b8a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -2496,13 +2496,12 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, struct task_struct *thread = NULL; int r = 0, i; struct kfd_process *target = NULL; + struct kfd_process_device *pdd = NULL; struct pid *pid = NULL; uint32_t *user_array = NULL; uint32_t debug_trap_action; uint64_t exception_mask; - uint32_t data1; - uint32_t data2; - uint32_t data3; + uint32_t data1, data2, data3, data4; bool check_devices; bool need_user_array; uint32_t size_to_copy_to_user_array = 0; @@ -2513,6 +2512,7 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, data1 = args->data1; data2 = args->data2; data3 = args->data3; + data4 = args->data4; exception_mask = args->exception_mask; if (sched_policy == KFD_SCHED_POLICY_NO_HWS) { @@ -2607,8 +2607,8 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, debug_trap_action == KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE || debug_trap_action == KFD_IOC_DBG_TRAP_NODE_SUSPEND || debug_trap_action == KFD_IOC_DBG_TRAP_NODE_RESUME || - debug_trap_action == KFD_IOC_DBG_TRAP_SET_ADDRESS_WATCH || - debug_trap_action == KFD_IOC_DBG_TRAP_CLEAR_ADDRESS_WATCH || + debug_trap_action == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH || + debug_trap_action == KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH || debug_trap_action == KFD_IOC_DBG_TRAP_SET_PRECISE_MEM_OPS)) { r = -EPERM; goto unlock_out; @@ -2631,6 +2631,19 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, } } + if (debug_trap_action == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH || + debug_trap_action == KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH) { + uint32_t device_id = debug_trap_action == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH ? + data4 : data2; + int user_gpu_id = kfd_process_get_user_gpu_id(target, device_id); + + pdd = kfd_process_device_data_by_id(target, user_gpu_id); + if (user_gpu_id == -EINVAL || !pdd) { + r = -ENODEV; + goto unlock_out; + } + } + switch (debug_trap_action) { case KFD_IOC_DBG_TRAP_ENABLE: switch (data1) { @@ -2711,11 +2724,11 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, args->data1 = KFD_IOCTL_DBG_MAJOR_VERSION; args->data2 = KFD_IOCTL_DBG_MINOR_VERSION; break; - case KFD_IOC_DBG_TRAP_CLEAR_ADDRESS_WATCH: - r = kfd_dbg_trap_clear_address_watch(target, data1); + case KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH: + r = kfd_dbg_trap_clear_dev_address_watch(pdd, data1); break; - case KFD_IOC_DBG_TRAP_SET_ADDRESS_WATCH: - r = kfd_dbg_trap_set_address_watch(target, + case KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH: + r = kfd_dbg_trap_set_dev_address_watch(pdd, args->ptr, /* watch address */ data3, /* watch address mask */ &data1, /* watch id */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index c69ab584d1d25..5ab20f6dc2910 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -32,23 +32,12 @@ enum { }; /* - * A bitmask to indicate which watch points have been allocated. - * bit meaning: - * 0: unallocated/available - * 1: allocated/unavailable + * The spinlock protects the per device dev->alloc_watch_ids for multi-process access. + * The per-process per-device pdd->alloc_watch_ids is protected by the debug IOCTL + * process mutex. */ -static uint32_t allocated_debug_watch_points = ~((1 << MAX_WATCH_ADDRESSES) - 1); static DEFINE_SPINLOCK(watch_points_lock); -/* Allocate and free watch point IDs for debugger */ -static int kfd_allocate_debug_watch_point(struct kfd_process *p, - uint64_t watch_address, - uint32_t watch_address_mask, - uint32_t *watch_point, - uint32_t watch_mode); -static int kfd_release_debug_watch_points(struct kfd_process *p, - uint32_t watch_point_bit_mask_to_free); - int kfd_dbg_ev_query_debug_event(struct kfd_process *process, unsigned int *queue_id, unsigned int *gpu_id, @@ -316,6 +305,121 @@ int kfd_dbg_send_exception_to_runtime(struct kfd_process *p, return 0; } +#define KFD_DEBUGGER_INVALID_WATCH_POINT_ID -1 +static int kfd_dbg_get_dev_watch_id(struct kfd_process_device *pdd, int *watch_id) { + int i; + + *watch_id = KFD_DEBUGGER_INVALID_WATCH_POINT_ID; + + spin_lock(&watch_points_lock); + + for (i = 0; i < MAX_WATCH_ADDRESSES; i++) { + /* device watchpoint in use so skip */ + if ((pdd->dev->alloc_watch_ids >> i) & 0x1) + continue; + + pdd->alloc_watch_ids |= 0x1 << i; + pdd->dev->alloc_watch_ids |= 0x1 << i; + *watch_id = i; + spin_unlock(&watch_points_lock); + return 0; + } + + spin_unlock(&watch_points_lock); + + return -ENOMEM; +} + +static void kfd_dbg_clear_dev_watch_id(struct kfd_process_device *pdd, int watch_id) { + spin_lock(&watch_points_lock); + + /* process owns device watch point so safe to clear */ + if ((pdd->alloc_watch_ids >> watch_id) & 0x1) { + pdd->alloc_watch_ids &= ~(0x1 << watch_id); + pdd->dev->alloc_watch_ids &= ~(0x1 << watch_id); + } + + spin_unlock(&watch_points_lock); +} + +static bool kfd_dbg_owns_dev_watch_id(struct kfd_process_device *pdd, int watch_id) +{ + bool owns_watch_id = false; + + spin_lock(&watch_points_lock); + owns_watch_id = watch_id < MAX_WATCH_ADDRESSES && ((pdd->alloc_watch_ids >> watch_id) & 0x1); + spin_unlock(&watch_points_lock); + + return owns_watch_id; +} + +int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd, uint32_t watch_id) +{ + int r; + + if (!kfd_dbg_owns_dev_watch_id(pdd, watch_id)) + return -EINVAL; + + r = debug_lock_and_unmap(pdd->dev->dqm); + if (r) + return r; + + amdgpu_amdkfd_gfx_off_ctrl(pdd->dev->adev, false); + pdd->watch_points[watch_id] = pdd->dev->kfd2kgd->clear_address_watch( + pdd->dev->adev, + watch_id); + amdgpu_amdkfd_gfx_off_ctrl(pdd->dev->adev, true); + + r = debug_map_and_unlock(pdd->dev->dqm); + + kfd_dbg_clear_dev_watch_id(pdd, watch_id); + + return r; +} + +int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd, + uint64_t watch_address, + uint32_t watch_address_mask, + uint32_t *watch_id, + uint32_t watch_mode) +{ + int r = kfd_dbg_get_dev_watch_id(pdd, watch_id); + + if (r) + return r; + + r = debug_lock_and_unmap(pdd->dev->dqm); + if (r) { + kfd_dbg_clear_dev_watch_id(pdd, *watch_id); + return r; + } + + amdgpu_amdkfd_gfx_off_ctrl(pdd->dev->adev, false); + pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd->set_address_watch( + pdd->dev->adev, + watch_address, + watch_address_mask, + *watch_id, + watch_mode, + pdd->dev->vm_info.last_vmid_kfd); + amdgpu_amdkfd_gfx_off_ctrl(pdd->dev->adev, true); + + r = debug_map_and_unlock(pdd->dev->dqm); + /* HWS is broken so no point in HW rollback but release the watchpoint anyways. */ + if (r) + kfd_dbg_clear_dev_watch_id(pdd, *watch_id); + + return r; +} + +static void kfd_dbg_clear_process_address_watch(struct kfd_process *target) { + int i, j; + + for (i = 0; i < target->n_pdds; i++) + for (j = 0; j < MAX_WATCH_ADDRESSES; j++) + kfd_dbg_trap_clear_dev_address_watch(target->pdds[i], j); +} + /* kfd_dbg_trap_deactivate: * target: target process * unwind: If this is unwinding a failed kfd_dbg_trap_enable() @@ -330,9 +434,7 @@ static void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int if (!unwind) { cancel_work_sync(&target->debug_event_workarea); - kfd_release_debug_watch_points(target, - target->allocated_debug_watch_point_bitmask); - target->allocated_debug_watch_point_bitmask = 0; + kfd_dbg_clear_process_address_watch(target); kfd_dbg_trap_set_wave_launch_mode(target, 0); kfd_dbg_trap_set_precise_mem_ops(target, 0); } @@ -599,42 +701,6 @@ int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target, return r; } -int kfd_dbg_trap_clear_address_watch(struct kfd_process *target, - uint32_t watch_id) -{ - /* check that we own watch id */ - if (!((1<allocated_debug_watch_point_bitmask)) { - pr_debug("Trying to free a watch point we don't own\n"); - return -EINVAL; - } - kfd_release_debug_watch_points(target, 1<allocated_debug_watch_point_bitmask ^= (1<allocated_debug_watch_point_bitmask |= (1 << *watch_id); - - return 0; -} - int kfd_dbg_trap_set_precise_mem_ops(struct kfd_process *target, uint32_t enable) { @@ -662,138 +728,6 @@ int kfd_dbg_trap_set_precise_mem_ops(struct kfd_process *target, return 0; } -#define KFD_DEBUGGER_INVALID_WATCH_POINT_ID -1 -static int kfd_dbg_get_watchpoint_id(void) -{ - int i, watch_point_to_allocate = KFD_DEBUGGER_INVALID_WATCH_POINT_ID; - - spin_lock(&watch_points_lock); - - for (i = 0; i < MAX_WATCH_ADDRESSES; i++) { - if (!(allocated_debug_watch_points & (1<n_pdds; i++) { - struct kfd_process_device *pdd = p->pdds[i]; - - r = debug_lock_and_unmap(pdd->dev->dqm); - if (r) - break; - - amdgpu_amdkfd_gfx_off_ctrl(pdd->dev->adev, false); - pdd->watch_points[*watch_point] = pdd->dev->kfd2kgd->set_address_watch( - pdd->dev->adev, - watch_address, - watch_address_mask, - *watch_point, - watch_mode, - pdd->dev->vm_info.last_vmid_kfd); - amdgpu_amdkfd_gfx_off_ctrl(pdd->dev->adev, true); - - r = debug_map_and_unlock(pdd->dev->dqm); - if (r) - break; - } - - return r; -} - -static int kfd_dbg_free_watchpoint_id(uint32_t watch_point_bit_mask_to_free) -{ - spin_lock(&watch_points_lock); - if (~allocated_debug_watch_points & watch_point_bit_mask_to_free) { - pr_err("Tried to free a free watch point! " - "allocated_debug_watch_points == 0x%08x " - "watch_point_bit_mask_to_free = 0x%08x\n", - allocated_debug_watch_points, - watch_point_bit_mask_to_free); - spin_unlock(&watch_points_lock); - return -EFAULT; - } - - pr_debug("Freeing watchpoint bitmask :0x%08x\n", - watch_point_bit_mask_to_free); - allocated_debug_watch_points ^= watch_point_bit_mask_to_free; - spin_unlock(&watch_points_lock); - - return 0; -} - - -static int kfd_release_debug_watch_points(struct kfd_process *p, - uint32_t watch_point_bit_mask_to_free) -{ - int i, j; - int r = kfd_dbg_free_watchpoint_id(watch_point_bit_mask_to_free); - - if (r) - return r; - - for (i = 0; i < p->n_pdds; i++) { - struct kfd_process_device *pdd = p->pdds[i]; - - r = debug_lock_and_unmap(pdd->dev->dqm); - if (r) - break; - - amdgpu_amdkfd_gfx_off_ctrl(pdd->dev->adev, false); - for (j = 0; j < MAX_WATCH_ADDRESSES; j++) { - if ((1<watch_points[j] = - pdd->dev->kfd2kgd->clear_address_watch( - pdd->dev->adev, - j); - } - amdgpu_amdkfd_gfx_off_ctrl(pdd->dev->adev, true); - - r = debug_map_and_unlock(pdd->dev->dqm); - if (r) - break; - } - return r; -} - int kfd_dbg_trap_query_exception_info(struct kfd_process *target, uint32_t source_id, uint32_t exception_code, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h index 3a7505507ddcc..7a982db3a4c89 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h @@ -58,9 +58,9 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target, uint32_t *trap_mask_supported); int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target, uint8_t wave_launch_mode); -int kfd_dbg_trap_clear_address_watch(struct kfd_process *target, +int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd, uint32_t watch_id); -int kfd_dbg_trap_set_address_watch(struct kfd_process *target, +int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd, uint64_t watch_address, uint32_t watch_address_mask, uint32_t *watch_id, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index e41c6ff956f2d..5e5849d83ae47 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -367,6 +367,9 @@ struct kfd_dev { /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */ struct dev_pagemap pgmap; + + /* Track per device allocated watch points. */ + uint32_t alloc_watch_ids; }; struct kfd_ipc_obj; @@ -882,6 +885,7 @@ struct kfd_process_device { uint32_t spi_dbg_override; uint32_t spi_dbg_launch_mode; uint32_t watch_points[4]; + uint32_t alloc_watch_ids; /* * If this process has been checkpointed before, then the user @@ -1006,9 +1010,6 @@ struct kfd_process { /* per-process-per device debug event fd file */ struct file *dbg_ev_file; - /* Allocated debug watch point IDs bitmask */ - uint32_t allocated_debug_watch_point_bitmask; - /* If the process is a kfd debugger, we need to know so we can clean * up at exit time. If a process enables debugging on itself, it does * its own clean-up, so we don't set the flag here. We track this by diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 9d0f70b8bd506..d247f1e8550a2 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -76,8 +76,9 @@ * 10.3 - Pass context_save_restore_area size to user-space * 11.0 - Restrict HW mode ops access to trap activate * 12.0 - Let the debugger specifiy entry sizes for snapshots + * 13.0 - Change address watch to set/clear per target device */ -#define KFD_IOCTL_DBG_MAJOR_VERSION 12 +#define KFD_IOCTL_DBG_MAJOR_VERSION 13 #define KFD_IOCTL_DBG_MINOR_VERSION 0 struct kfd_ioctl_get_version_args { @@ -498,11 +499,11 @@ struct kfd_runtime_info { * exception_mask: unused * ptr: unused * data1: watch ID - * data2: unused + * data2: device id * data3: unused * data4: unused */ -#define KFD_IOC_DBG_TRAP_CLEAR_ADDRESS_WATCH 8 +#define KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH 8 /* KFD_IOC_DBG_TRAP_SET_ADDRESS_WATCH: * exception_mask: unused @@ -510,9 +511,9 @@ struct kfd_runtime_info { * data1: Watch ID (OUT) * data2: watch_mode: 0=read, 1=nonread, 2=atomic, 3=all * data3: watch address mask - * data4: unused + * data4: device id */ -#define KFD_IOC_DBG_TRAP_SET_ADDRESS_WATCH 9 +#define KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH 9 /* KFD_IOC_DBG_TRAP_SET_PRECISE_MEM_OPS * exception_mask: unused From a85167b001317a9f61bf9227acc02c7b78002a91 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Mon, 8 Aug 2022 17:38:51 -0400 Subject: [PATCH 59/79] drm/amdkfd: Fix VRAM attachment Use kfd_mem_attach_vram_bo instead of kfd_mem_attach_dmabuf. Signed-off-by: Felix Kuehling Reviewed-by: Ramesh Errabolu --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 5559f16e5a19a..7f02641d3469e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -917,9 +917,8 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, if (ret) goto unwind; #ifdef AMDKCL_AMDGPU_DMABUF_OPS - /* Enable acces to GTT and VRAM BOs of peer devices */ - } else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT || - mem->domain == AMDGPU_GEM_DOMAIN_VRAM) { + /* Enable acces to GTT BOs of peer devices */ + } else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT) { attachment[i]->type = KFD_MEM_ATT_DMABUF; ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]); if (ret) From bed1005e5f1af5d3039d69d3c937a9360da653dc Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Mon, 8 Aug 2022 19:51:45 -0400 Subject: [PATCH 60/79] drm/amdkfd: Fix DEVICE_PRIVATE page leak on 5.18 commit 27674ef6c73f ("mm: remove the extra ZONE_DEVICE struct page refcount") removed an extra reference count for ZONE_DEVICE pages. This requires a corresponding driver change (which was part of that patch). For DKMS builds, conditionally get a page reference only on old kernels without this patch. CONFIG_DEV_PAGEMAP_OPS is a suitable indicator, because this option was removed by the patch, and was previously selected by CONFIG_DEVICE_PRIVATE. Signed-off-by: Felix Kuehling Reviewed-by: Alex Sierra --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index ad3d43eaa9be8..bbc9252807fce 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -231,7 +231,9 @@ svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn) VM_BUG_ON_PAGE(page_ref_count(page), page); init_page_count(page); #else +#if IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) get_page(page); +#endif #endif lock_page(page); } From 52a4f9a03c5a546d80758321d9f1edd720a7b3eb Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Tue, 9 Aug 2022 10:13:54 +0800 Subject: [PATCH 61/79] drm/amd/pm: skip pptable override for smu_v13_0_7 skip pptable override for smu_v13_0_7 secure boards only. Signed-off-by: Kenneth Feng Reviewed-by: Feifei Xu --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index cd159e2401472..e56ec06012ddc 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -212,6 +212,9 @@ int smu_v13_0_init_pptable_microcode(struct smu_context *smu) if (!adev->scpm_enabled) return 0; + if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 7)) + return 0; + /* override pptable_id from driver parameter */ if (amdgpu_smu_pptable_id >= 0) { pptable_id = amdgpu_smu_pptable_id; @@ -219,13 +222,6 @@ int smu_v13_0_init_pptable_microcode(struct smu_context *smu) } else { pptable_id = smu->smu_table.boot_values.pp_table_id; - if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 7) && - pptable_id == 3667) - pptable_id = 36671; - - if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 7) && - pptable_id == 3688) - pptable_id = 36881; /* * Temporary solution for SMU V13.0.0 with SCPM enabled: * - use 36831 signed pptable when pp_table_id is 3683 From e6c1a7fcc44c4c79b160f883c74669bd43692568 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Wed, 20 Apr 2022 09:37:56 -0400 Subject: [PATCH 62/79] drm/amdgpu: Avoid direct cast to amdgpu_ttm_tt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For typesafety, use container_of() instead of implicit cast from struct ttm_tt to struct amdgpu_ttm_tt. Reviewed-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Rajneesh Bhardwaj --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 34 +++++++++++++------------ 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0daf112debeb7..6609b9177fc08 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -714,6 +714,8 @@ struct amdgpu_ttm_tt { #endif /* HAVE_AMDKCL_HMM_MIRROR_ENABLED */ }; +#define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm) + #ifdef CONFIG_DRM_AMDGPU_USERPTR #ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED /* @@ -726,7 +728,7 @@ struct amdgpu_ttm_tt { int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) { struct ttm_tt *ttm = bo->tbo.ttm; - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); unsigned long start = gtt->userptr; struct vm_area_struct *vma; struct mm_struct *mm; @@ -780,7 +782,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) */ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); bool r = false; if (!gtt || !gtt->userptr) @@ -961,7 +963,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev, struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); enum dma_data_direction direction = write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE; @@ -998,7 +1000,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev, struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); enum dma_data_direction direction = write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE; @@ -1045,7 +1047,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, { struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo); struct ttm_tt *ttm = tbo->ttm; - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); if (amdgpu_bo_encrypted(abo)) flags |= AMDGPU_PTE_TMZ; @@ -1083,7 +1085,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, struct ttm_resource *bo_mem) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - struct amdgpu_ttm_tt *gtt = (void*)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); uint64_t flags; int r; @@ -1153,7 +1155,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct ttm_operation_ctx ctx = { false, false }; - struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(bo->ttm); struct ttm_placement placement; struct ttm_place placements; struct ttm_resource *tmp; @@ -1224,7 +1226,7 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev, struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); /* if the pages have userptr pinning then clear that first */ if (gtt->userptr) { @@ -1254,7 +1256,7 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev, static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev, struct ttm_tt *ttm) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); if (gtt->usertask) put_task_struct(gtt->usertask); @@ -1308,7 +1310,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev, struct ttm_operation_ctx *ctx) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); pgoff_t i; int ret; @@ -1342,7 +1344,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev, static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); struct amdgpu_device *adev; pgoff_t i; @@ -1411,7 +1413,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo, /* Set TTM_TT_FLAG_EXTERNAL before populate but after create. */ bo->ttm->page_flags |= TTM_TT_FLAG_EXTERNAL; - gtt = (void *)bo->ttm; + gtt = ttm_to_amdgpu_ttm_tt(bo->ttm); gtt->userptr = addr; gtt->userflags = flags; @@ -1435,7 +1437,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo, */ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); if (gtt == NULL) return NULL; @@ -1455,7 +1457,7 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end, unsigned long *userptr) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); unsigned long size; if (gtt == NULL || !gtt->userptr) @@ -1478,7 +1480,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, */ bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); if (gtt == NULL || !gtt->userptr) return false; @@ -1562,7 +1564,7 @@ bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) */ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); if (gtt == NULL) return false; From 6aa9a4e25ed3ba634f978f79642ebdd779002555 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 4 Aug 2022 18:19:38 -0400 Subject: [PATCH 63/79] drm/amdkfd: Handle restart of kfd_ioctl_wait_events When kfd_ioctl_wait_events needs to restart due to a signal, we need to update the timeout to account for the time already elapsed. We also need to undo auto_reset of events that have signaled already, so that the restarted ioctl will be able to count those signals again. This fixes infinite hangs when kfd_ioctl_wait_events is interrupted by a signal. Signed-off-by: Felix Kuehling Reviewed-and-tested-by: Xiaogang Chen --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 24 ++++++++++++------------ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 847076b9a7b8a..d7fb67b63f4bf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -884,7 +884,7 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, err = kfd_wait_on_events(p, args->num_events, (void __user *)args->events_ptr, (args->wait_for_all != 0), - args->timeout, &args->wait_result); + &args->timeout, &args->wait_result); return err; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 655b6fca8c10c..651d10daa36d4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -906,7 +906,8 @@ static long user_timeout_to_jiffies(uint32_t user_timeout_ms) return msecs_to_jiffies(user_timeout_ms) + 1; } -static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters) +static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters, + bool undo_auto_reset) { uint32_t i; @@ -915,6 +916,9 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters) spin_lock(&waiters[i].event->lock); remove_wait_queue(&waiters[i].event->wq, &waiters[i].wait); + if (undo_auto_reset && waiters[i].activated && + waiters[i].event && waiters[i].event->auto_reset) + set_event(waiters[i].event); spin_unlock(&waiters[i].event->lock); } @@ -923,7 +927,7 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters) int kfd_wait_on_events(struct kfd_process *p, uint32_t num_events, void __user *data, - bool all, uint32_t user_timeout_ms, + bool all, uint32_t *user_timeout_ms, uint32_t *wait_result) { struct kfd_event_data __user *events = @@ -932,7 +936,7 @@ int kfd_wait_on_events(struct kfd_process *p, int ret = 0; struct kfd_event_waiter *event_waiters = NULL; - long timeout = user_timeout_to_jiffies(user_timeout_ms); + long timeout = user_timeout_to_jiffies(*user_timeout_ms); event_waiters = alloc_event_waiters(num_events); if (!event_waiters) { @@ -982,15 +986,11 @@ int kfd_wait_on_events(struct kfd_process *p, } if (signal_pending(current)) { - /* - * This is wrong when a nonzero, non-infinite timeout - * is specified. We need to use - * ERESTARTSYS_RESTARTBLOCK, but struct restart_block - * contains a union with data for each user and it's - * in generic kernel code that I don't want to - * touch yet. - */ ret = -ERESTARTSYS; + if (*user_timeout_ms != KFD_EVENT_TIMEOUT_IMMEDIATE && + *user_timeout_ms != KFD_EVENT_TIMEOUT_INFINITE) + *user_timeout_ms = jiffies_to_msecs( + max(0l, timeout-1)); break; } @@ -1031,7 +1031,7 @@ int kfd_wait_on_events(struct kfd_process *p, event_waiters, events); out_unlock: - free_waiters(num_events, event_waiters); + free_waiters(num_events, event_waiters, ret == -ERESTARTSYS); mutex_unlock(&p->event_mutex); out: if (ret) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 5e5849d83ae47..db2a8a070b695 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1533,7 +1533,7 @@ void kfd_event_free_process(struct kfd_process *p); int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma); int kfd_wait_on_events(struct kfd_process *p, uint32_t num_events, void __user *data, - bool all, uint32_t user_timeout_ms, + bool all, uint32_t *user_timeout_ms, uint32_t *wait_result); void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, uint32_t valid_id_bits); From 39ce939d9eaaa0f897e35ea0979c6537e51fd8bc Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Fri, 29 Jul 2022 18:37:27 +0800 Subject: [PATCH 64/79] drm/amdgpu: enable GFX Power Gating for GC IP v11.0.1 Enable GFX Power Gating control for GC IP v11.0.1. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 38 ++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index e03618803a1c2..158d87e6805d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -53,6 +53,7 @@ #define GFX11_MEC_HPD_SIZE 2048 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L +#define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1 0x1388 #define regCGTT_WD_CLK_CTRL 0x5086 #define regCGTT_WD_CLK_CTRL_BASE_IDX 1 @@ -5279,6 +5280,38 @@ static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = { .update_spm_vmid = gfx_v11_0_update_spm_vmid, }; +static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) +{ + u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); + + if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) + data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; + else + data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; + + WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data); + + // Program RLC_PG_DELAY3 for CGPG hysteresis + if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(11, 0, 1): + WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); + break; + default: + break; + } + } +} + +static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable) +{ + amdgpu_gfx_rlc_enter_safe_mode(adev); + + gfx_v11_cntl_power_gating(adev, enable); + + amdgpu_gfx_rlc_exit_safe_mode(adev); +} + static int gfx_v11_0_set_powergating_state(void *handle, enum amd_powergating_state state) { @@ -5293,6 +5326,11 @@ static int gfx_v11_0_set_powergating_state(void *handle, case IP_VERSION(11, 0, 2): amdgpu_gfx_off_ctrl(adev, enable); break; + case IP_VERSION(11, 0, 1): + gfx_v11_cntl_pg(adev, enable); + /* TODO: Enable this when GFXOFF is ready */ + // amdgpu_gfx_off_ctrl(adev, enable); + break; default: break; } From 967ede045562461435ef664d42ce47cc8de76c24 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Fri, 29 Jul 2022 15:25:57 +0800 Subject: [PATCH 65/79] drm/amdgpu: add GFX Power Gating support for GC IP v11.0.1 Add AMD_PG_SUPPORT_GFX_PG support. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc21.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index b700c6cb14b48..bbbf760f8ad2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -595,6 +595,7 @@ static int soc21_common_early_init(void *handle) AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG; adev->pg_flags = + AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_JPEG; adev->external_rev_id = adev->rev_id + 0x1; break; From 726d6333eec3cdb20324d0dee0f1423ea1f1d5d4 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 9 Aug 2022 17:10:52 -0400 Subject: [PATCH 66/79] drm/amdkfd: Remove useless #ifdefs Both branches are exactly the same, so the #ifdefs are no longer needed. Fixes: 90fbc2dc692e ("drm/amdkcl: cleanup kcl_bitmap_xxx") CC: Flora Cui Signed-off-by: Felix Kuehling Reviewed-by: Flora Cui Reviewed-by: Leslie Shi --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 9 --------- drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 9 --------- 2 files changed, 18 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 79d609c0732f5..43697b3e4c9c2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1028,11 +1028,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base) free_pages((unsigned long)pdd->qpd.cwsr_kaddr, get_order(KFD_CWSR_TBA_TMA_SIZE)); -#if defined(HAVE_BITMAP_FUNCS) bitmap_free(pdd->qpd.doorbell_bitmap); -#else - bitmap_free(pdd->qpd.doorbell_bitmap); -#endif idr_destroy(&pdd->alloc_idr); mutex_destroy(&pdd->qpd.doorbell_lock); @@ -1548,13 +1544,8 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd, if (!KFD_IS_SOC15(dev)) return 0; -#if defined(HAVE_BITMAP_FUNCS) qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, GFP_KERNEL); -#else - qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, - GFP_KERNEL); -#endif if (!qpd->doorbell_bitmap) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index df490ffcc505f..35a1dfe7fe64b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -151,13 +151,8 @@ void kfd_process_dequeue_from_all_devices(struct kfd_process *p) int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p) { INIT_LIST_HEAD(&pqm->queues); -#if defined(HAVE_BITMAP_FUNCS) pqm->queue_slot_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, GFP_KERNEL); -#else - pqm->queue_slot_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, - GFP_KERNEL); -#endif if (!pqm->queue_slot_bitmap) return -ENOMEM; pqm->process = p; @@ -179,11 +174,7 @@ void pqm_uninit(struct process_queue_manager *pqm) kfree(pqn); } -#if defined(HAVE_BITMAP_FUNCS) bitmap_free(pqm->queue_slot_bitmap); -#else - bitmap_free(pqm->queue_slot_bitmap); -#endif pqm->queue_slot_bitmap = NULL; } From 9c3054129fa1ef57887449841926bb342deedc2f Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 9 Aug 2022 17:15:41 -0400 Subject: [PATCH 67/79] drm/amdkfd: Remove unused variable This must have been introduced by a rebasing error on amd-staging-dkms-*. Signed-off-by: Felix Kuehling Reviewed-by: Leslie Shi --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 55f923aca648f..4b9d2a15fb085 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1287,7 +1287,6 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, dma_addr_t *dma_addr, struct amdgpu_device *bo_adev, struct dma_fence **fence, bool flush_tlb) { - uint64_t vram_base_offset = 0; struct amdgpu_device *adev = pdd->dev->adev; struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv); uint64_t pte_flags; @@ -1301,9 +1300,6 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, pr_debug("svms 0x%p [0x%lx 0x%lx] readonly %d\n", prange->svms, last_start, last_start + npages - 1, readonly); - if (prange->svm_bo && prange->ttm_res) - vram_base_offset = bo_adev->vm_manager.vram_base_offset; - for (i = offset; i < offset + npages; i++) { last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN; dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN; From b8f3cdccd0741f712800cb06adee2dddcf89b135 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Thu, 21 Jul 2022 12:55:34 -0400 Subject: [PATCH 68/79] drm/amd/display: consider DSC pass-through during mode validation Add a mode validation routine for DSC pass-through. Both the link from source to hub, and the link from hub to monitor are checked, according to the current link training result and full pbn returned by enum path resource sideband message. Pick up the minimum value as the bandwidth bottleneck for the end to end link bandwidth constraint, and check if the maximum DSC decompression bandwidth can fit. Co-authored-by: Fangzhi Zuo Signed-off-by: Hamza Mahfooz --- .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 79 +++++++++++++++++-- 1 file changed, 74 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 47835cec76850..1b9ccce0a78e4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -36,6 +36,7 @@ #include "dm_helpers.h" #include "dc_link_ddc.h" +#include "dc_link_dp.h" #include "ddc_service_types.h" #include "dpcd_defs.h" @@ -1569,17 +1570,85 @@ bool pre_validate_dsc(struct drm_atomic_state *state, #endif +static unsigned int kbps_from_pbn(unsigned int pbn) +{ + unsigned int kbps = pbn; + + kbps *= (1000000 / PEAK_FACTOR_X1000); + kbps *= 8; + kbps *= 54; + kbps /= 64; + + return kbps; +} + +static bool is_dsc_common_config_possible(struct dc_stream_state *stream, + struct dc_dsc_bw_range *bw_range) +{ + struct dc_dsc_policy dsc_policy = {0}; + + dc_dsc_get_policy_for_timing(&stream->timing, 0, &dsc_policy); + dc_dsc_compute_bandwidth_range(stream->sink->ctx->dc->res_pool->dscs[0], + stream->sink->ctx->dc->debug.dsc_min_slice_height_override, + dsc_policy.min_target_bpp * 16, + dsc_policy.max_target_bpp * 16, + &stream->sink->dsc_caps.dsc_dec_caps, + &stream->timing, bw_range); + + return bw_range->max_target_bpp_x16 && bw_range->min_target_bpp_x16; +} + enum dc_status dm_dp_mst_is_port_support_mode( struct amdgpu_dm_connector *aconnector, struct dc_stream_state *stream) { + struct dc_link_settings cur_link_settings; + unsigned int end_to_end_bw_in_kbps = 0; + unsigned int upper_link_bw_in_kbps = 0, down_link_bw_in_kbps = 0; + unsigned int max_compressed_bw_in_kbps = 0; + struct dc_dsc_bw_range bw_range = {0}; int bpp, pbn, branch_max_throughput_mps = 0; - /* check if mode could be supported within fUll_pbn */ - bpp = convert_dc_color_depth_into_bpc(stream->timing.display_color_depth) * 3; - pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp, false); - if (pbn > aconnector->port->full_pbn) - return DC_FAIL_BANDWIDTH_VALIDATE; + /* + * check if the mode could be supported if DSC pass-through is supported + * AND check if there enough bandwidth available to support the mode + * with DSC enabled. + */ + if (is_dsc_common_config_possible(stream, &bw_range) && + aconnector->port->passthrough_aux) { + mutex_lock(&aconnector->mst_mgr.lock); + + cur_link_settings = stream->link->verified_link_cap; + + upper_link_bw_in_kbps = dc_link_bandwidth_kbps(aconnector->dc_link, + &cur_link_settings + ); + down_link_bw_in_kbps = kbps_from_pbn(aconnector->port->full_pbn); + + /* pick the bottleneck */ + end_to_end_bw_in_kbps = min(upper_link_bw_in_kbps, + down_link_bw_in_kbps); + + mutex_unlock(&aconnector->mst_mgr.lock); + + /* + * use the maximum dsc compression bandwidth as the required + * bandwidth for the mode + */ + max_compressed_bw_in_kbps = bw_range.min_kbps; + + if (end_to_end_bw_in_kbps < max_compressed_bw_in_kbps) { + DRM_DEBUG_DRIVER("Mode does not fit into DSC pass-through bandwidth validation\n"); + return DC_FAIL_BANDWIDTH_VALIDATE; + } + } else { + /* check if mode could be supported within full_pbn */ + bpp = convert_dc_color_depth_into_bpc(stream->timing.display_color_depth) * 3; + pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp, false); + + if (pbn > aconnector->port->full_pbn) + return DC_FAIL_BANDWIDTH_VALIDATE; + } /* check is mst dsc output bandwidth branch_overall_throughput_0_mps */ switch (stream->timing.pixel_encoding) { From 99d08a5d1ad7fb76b33aabae46cd88bc7e6e6df4 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Mon, 18 Jul 2022 16:50:45 -0400 Subject: [PATCH 69/79] drm/amd/display: implement DSC pass-through support Currently, we only attempt to setup DSC at the virtual DPCD port, however many modern displays come with DSC support and MST hubs can now support DSC pass-through. So, to more optimally make use of the available bandwidth, use DSC pass-through when possible by adding DSC pass-through enablement support into the DSC enable sequence. Signed-off-by: Hamza Mahfooz --- .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 45 +++++++++++++++++-- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 62d115fd02396..24bd6b469a00e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -818,8 +818,14 @@ bool dm_helpers_dp_write_dsc_enable( const struct dc_stream_state *stream, bool enable) { - uint8_t enable_dsc = enable ? 1 : 0; + static const uint8_t DSC_DISABLE; + static const uint8_t DSC_DECODING = 0x01; + static const uint8_t DSC_PASSTHROUGH = 0x02; + struct amdgpu_dm_connector *aconnector; + struct drm_dp_mst_port *port; + uint8_t enable_dsc = enable ? DSC_DECODING : DSC_DISABLE; + uint8_t enable_passthrough = enable ? DSC_PASSTHROUGH : DSC_DISABLE; uint8_t ret = 0; if (!stream) @@ -839,8 +845,39 @@ bool dm_helpers_dp_write_dsc_enable( aconnector->dsc_aux, stream, enable_dsc); #endif - ret = drm_dp_dpcd_write(aconnector->dsc_aux, DP_DSC_ENABLE, &enable_dsc, 1); - DC_LOG_DC("Send DSC %s to MST RX\n", enable_dsc ? "enable" : "disable"); + port = aconnector->port; + + if (enable) { + if (port->passthrough_aux) { + ret = drm_dp_dpcd_write(port->passthrough_aux, + DP_DSC_ENABLE, + &enable_passthrough, 1); + DC_LOG_DC("Sent DSC pass-through enable to virtual dpcd port, ret = %u\n", + ret); + } + + ret = drm_dp_dpcd_write(aconnector->dsc_aux, + DP_DSC_ENABLE, &enable_dsc, 1); + DC_LOG_DC("Sent DSC decoding enable to %s port, ret = %u\n", + (port->passthrough_aux) ? "remote RX" : + "virtual dpcd", + ret); + } else { + ret = drm_dp_dpcd_write(aconnector->dsc_aux, + DP_DSC_ENABLE, &enable_dsc, 1); + DC_LOG_DC("Sent DSC decoding disable to %s port, ret = %u\n", + (port->passthrough_aux) ? "remote RX" : + "virtual dpcd", + ret); + + if (port->passthrough_aux) { + ret = drm_dp_dpcd_write(port->passthrough_aux, + DP_DSC_ENABLE, + &enable_passthrough, 1); + DC_LOG_DC("Sent DSC pass-through disable to virtual dpcd port, ret = %u\n", + ret); + } + } } if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT || stream->signal == SIGNAL_TYPE_EDP) { @@ -857,7 +894,7 @@ bool dm_helpers_dp_write_dsc_enable( #endif } - return (ret > 0); + return ret; } #endif From 92af6f6883033229e92e3f83d7cd5fa0a35b3bd8 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Mon, 8 Aug 2022 16:22:42 -0400 Subject: [PATCH 70/79] drm/amd/display: include soc._clock_tmp[] into DC's scratch region Currently, we are using soc._clock_tmp[] to temporarily store and modify data from soc.clock_limits[] before copying it back into soc.clock_limits[] (because modifying data directly in soc.clock_limits[] causes unintended behavior). However, this approach has a number of downsides, such as: 1. struct _vcs_dpi_soc_bounding_box_st's creation/destruction aren't well defined (which could mean more unintended behavior). 2. Throwing "temp" varibles in structs everywhere doesn't make for a particularly readable codebase. For these reasons, we should get rid of soc._clock_tmp[] by defining a struct scratch within struct dc that, contains within it all of the temporary variables (including _clock_tmp[]) such that it is obvious how they are intended to be used. Co-authored-by: Leo Li Signed-off-by: Hamza Mahfooz --- drivers/gpu/drm/amd/display/dc/dc.h | 11 +++ .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 34 ++++---- .../amd/display/dc/dml/dcn301/dcn301_fpu.c | 40 +++++----- .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c | 77 ++++++++++--------- .../amd/display/dc/dml/display_mode_structs.h | 6 -- 5 files changed, 91 insertions(+), 77 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index a41247a00c83f..6621f608b5a98 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -901,6 +901,17 @@ struct dc { uint32_t *dcn_reg_offsets; uint32_t *nbio_reg_offsets; + + /* Scratch memory */ + struct { + struct { + /* + * For matching clock_limits table in driver with table + * from PMFW. + */ + struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES]; + } update_bw_bounding_box; + } scratch; }; enum frame_buffer_mode { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index d34e0f1314d91..d680f1c5b69f8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -2234,6 +2234,7 @@ static struct _vcs_dpi_voltage_scaling_st construct_low_pstate_lvl(struct clk_li void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { + struct _vcs_dpi_voltage_scaling_st *s = dc->scratch.update_bw_bounding_box.clock_limits; struct dcn21_resource_pool *pool = TO_DCN21_RES_POOL(dc->res_pool); struct clk_limit_table *clk_table = &bw_params->clk_table; unsigned int i, closest_clk_lvl = 0, k = 0; @@ -2247,8 +2248,7 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params ASSERT(clk_table->num_entries); /* Copy dcn2_1_soc.clock_limits to clock_limits to avoid copying over null states later */ - memcpy(&dcn2_1_soc._clock_tmp, &dcn2_1_soc.clock_limits, - sizeof(dcn2_1_soc.clock_limits)); + memcpy(s, dcn2_1_soc.clock_limits, sizeof(dcn2_1_soc.clock_limits)); for (i = 0; i < clk_table->num_entries; i++) { /* loop backwards*/ @@ -2263,25 +2263,25 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params if (i == 1) k++; - dcn2_1_soc._clock_tmp[k].state = k; - dcn2_1_soc._clock_tmp[k].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; - dcn2_1_soc._clock_tmp[k].fabricclk_mhz = clk_table->entries[i].fclk_mhz; - dcn2_1_soc._clock_tmp[k].socclk_mhz = clk_table->entries[i].socclk_mhz; - dcn2_1_soc._clock_tmp[k].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2; - - dcn2_1_soc._clock_tmp[k].dispclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; - dcn2_1_soc._clock_tmp[k].dppclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; - dcn2_1_soc._clock_tmp[k].dram_bw_per_chan_gbps = dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; - dcn2_1_soc._clock_tmp[k].dscclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; - dcn2_1_soc._clock_tmp[k].dtbclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; - dcn2_1_soc._clock_tmp[k].phyclk_d18_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; - dcn2_1_soc._clock_tmp[k].phyclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; + s[k].state = k; + s[k].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; + s[k].fabricclk_mhz = clk_table->entries[i].fclk_mhz; + s[k].socclk_mhz = clk_table->entries[i].socclk_mhz; + s[k].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2; + + s[k].dispclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; + s[k].dppclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; + s[k].dram_bw_per_chan_gbps = + dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; + s[k].dscclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; + s[k].dtbclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; + s[k].phyclk_d18_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; + s[k].phyclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; k++; } - memcpy(&dcn2_1_soc.clock_limits, &dcn2_1_soc._clock_tmp, - sizeof(dcn2_1_soc.clock_limits)); + memcpy(dcn2_1_soc.clock_limits, s, sizeof(dcn2_1_soc.clock_limits)); if (clk_table->num_entries) { dcn2_1_soc.num_states = clk_table->num_entries + 1; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c index d211cf6d234c7..241d28d0b7fb1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c @@ -322,6 +322,7 @@ static void calculate_wm_set_for_vlevel(int vlevel, void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { + struct _vcs_dpi_voltage_scaling_st *s = dc->scratch.update_bw_bounding_box.clock_limits; struct dcn301_resource_pool *pool = TO_DCN301_RES_POOL(dc->res_pool); struct clk_limit_table *clk_table = &bw_params->clk_table; unsigned int i, closest_clk_lvl; @@ -329,8 +330,7 @@ void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param dc_assert_fp_enabled(); - memcpy(&dcn3_01_soc._clock_tmp, &dcn3_01_soc.clock_limits, - sizeof(dcn3_01_soc.clock_limits)); + memcpy(s, dcn3_01_soc.clock_limits, sizeof(dcn3_01_soc.clock_limits)); /* Default clock levels are used for diags, which may lead to overclocking. */ if (!IS_DIAG_DC(dc->ctx->dce_environment)) { @@ -348,31 +348,33 @@ void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param } } - dcn3_01_soc._clock_tmp[i].state = i; - dcn3_01_soc._clock_tmp[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; - dcn3_01_soc._clock_tmp[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; - dcn3_01_soc._clock_tmp[i].socclk_mhz = clk_table->entries[i].socclk_mhz; - dcn3_01_soc._clock_tmp[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2; - - dcn3_01_soc._clock_tmp[i].dispclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dispclk_mhz; - dcn3_01_soc._clock_tmp[i].dppclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dppclk_mhz; - dcn3_01_soc._clock_tmp[i].dram_bw_per_chan_gbps = dcn3_01_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; - dcn3_01_soc._clock_tmp[i].dscclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dscclk_mhz; - dcn3_01_soc._clock_tmp[i].dtbclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; - dcn3_01_soc._clock_tmp[i].phyclk_d18_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; - dcn3_01_soc._clock_tmp[i].phyclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_mhz; + s[i].state = i; + s[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; + s[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; + s[i].socclk_mhz = clk_table->entries[i].socclk_mhz; + s[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2; + + s[i].dispclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dispclk_mhz; + s[i].dppclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dppclk_mhz; + s[i].dram_bw_per_chan_gbps = + dcn3_01_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; + s[i].dscclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dscclk_mhz; + s[i].dtbclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; + s[i].phyclk_d18_mhz = + dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; + s[i].phyclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_mhz; } if (clk_table->num_entries) { dcn3_01_soc.num_states = clk_table->num_entries; /* duplicate last level */ - dcn3_01_soc._clock_tmp[dcn3_01_soc.num_states] = dcn3_01_soc.clock_limits[dcn3_01_soc.num_states - 1]; - dcn3_01_soc._clock_tmp[dcn3_01_soc.num_states].state = dcn3_01_soc.num_states; + s[dcn3_01_soc.num_states] = + dcn3_01_soc.clock_limits[dcn3_01_soc.num_states - 1]; + s[dcn3_01_soc.num_states].state = dcn3_01_soc.num_states; } } - memcpy(&dcn3_01_soc.clock_limits, &dcn3_01_soc._clock_tmp, - sizeof(dcn3_01_soc.clock_limits)); + memcpy(dcn3_01_soc.clock_limits, s, sizeof(dcn3_01_soc.clock_limits)); dcn3_01_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c index 149a1b17cdf3f..0e62eb823e343 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c @@ -597,14 +597,14 @@ void dcn31_calculate_wm_and_dlg_fp( void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { + struct _vcs_dpi_voltage_scaling_st *s = dc->scratch.update_bw_bounding_box.clock_limits; struct clk_limit_table *clk_table = &bw_params->clk_table; unsigned int i, closest_clk_lvl; int j; dc_assert_fp_enabled(); - memcpy(&dcn3_1_soc._clock_tmp, &dcn3_1_soc.clock_limits, - sizeof(dcn3_1_soc.clock_limits)); + memcpy(s, dcn3_1_soc.clock_limits, sizeof(dcn3_1_soc.clock_limits)); // Default clock levels are used for diags, which may lead to overclocking. if (!IS_DIAG_DC(dc->ctx->dce_environment)) { @@ -633,34 +633,36 @@ void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params } } - dcn3_1_soc._clock_tmp[i].state = i; + s[i].state = i; /* Clocks dependent on voltage level. */ - dcn3_1_soc._clock_tmp[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; - dcn3_1_soc._clock_tmp[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; - dcn3_1_soc._clock_tmp[i].socclk_mhz = clk_table->entries[i].socclk_mhz; - dcn3_1_soc._clock_tmp[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; + s[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; + s[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; + s[i].socclk_mhz = clk_table->entries[i].socclk_mhz; + s[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * + 2 * clk_table->entries[i].wck_ratio; /* Clocks independent of voltage level. */ - dcn3_1_soc._clock_tmp[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : + s[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : dcn3_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; - dcn3_1_soc._clock_tmp[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : + s[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : dcn3_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; - dcn3_1_soc._clock_tmp[i].dram_bw_per_chan_gbps = dcn3_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; - dcn3_1_soc._clock_tmp[i].dscclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; - dcn3_1_soc._clock_tmp[i].dtbclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; - dcn3_1_soc._clock_tmp[i].phyclk_d18_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; - dcn3_1_soc._clock_tmp[i].phyclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; + s[i].dram_bw_per_chan_gbps = + dcn3_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; + s[i].dscclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; + s[i].dtbclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; + s[i].phyclk_d18_mhz = + dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; + s[i].phyclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; } if (clk_table->num_entries) { dcn3_1_soc.num_states = clk_table->num_entries; } } - memcpy(&dcn3_1_soc.clock_limits, &dcn3_1_soc._clock_tmp, - sizeof(dcn3_1_soc.clock_limits)); + memcpy(dcn3_1_soc.clock_limits, s, sizeof(dcn3_1_soc.clock_limits)); dcn3_1_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; @@ -727,6 +729,7 @@ void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { + struct _vcs_dpi_voltage_scaling_st *s = dc->scratch.update_bw_bounding_box.clock_limits; struct clk_limit_table *clk_table = &bw_params->clk_table; unsigned int i, closest_clk_lvl; int max_dispclk_mhz = 0, max_dppclk_mhz = 0; @@ -734,8 +737,7 @@ void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param dc_assert_fp_enabled(); - memcpy(&dcn3_16_soc._clock_tmp, &dcn3_16_soc.clock_limits, - sizeof(dcn3_16_soc.clock_limits)); + memcpy(s, dcn3_16_soc.clock_limits, sizeof(dcn3_16_soc.clock_limits)); // Default clock levels are used for diags, which may lead to overclocking. if (!IS_DIAG_DC(dc->ctx->dce_environment)) { @@ -757,7 +759,8 @@ void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param for (i = 0; i < clk_table->num_entries; i++) { /* loop backwards*/ for (closest_clk_lvl = 0, j = dcn3_16_soc.num_states - 1; j >= 0; j--) { - if ((unsigned int) dcn3_16_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { + if ((unsigned int) dcn3_16_soc.clock_limits[j].dcfclk_mhz <= + clk_table->entries[i].dcfclk_mhz) { closest_clk_lvl = j; break; } @@ -768,39 +771,43 @@ void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param closest_clk_lvl = dcn3_16_soc.num_states - 1; } - dcn3_16_soc._clock_tmp[i].state = i; + s[i].state = i; /* Clocks dependent on voltage level. */ - dcn3_16_soc._clock_tmp[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; + s[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; if (clk_table->num_entries == 1 && - dcn3_16_soc._clock_tmp[i].dcfclk_mhz < dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) { + s[i].dcfclk_mhz < + dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) { /*SMU fix not released yet*/ - dcn3_16_soc._clock_tmp[i].dcfclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz; + s[i].dcfclk_mhz = + dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz; } - dcn3_16_soc._clock_tmp[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; - dcn3_16_soc._clock_tmp[i].socclk_mhz = clk_table->entries[i].socclk_mhz; - dcn3_16_soc._clock_tmp[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; + s[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; + s[i].socclk_mhz = clk_table->entries[i].socclk_mhz; + s[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * + 2 * clk_table->entries[i].wck_ratio; /* Clocks independent of voltage level. */ - dcn3_16_soc._clock_tmp[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : + s[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : dcn3_16_soc.clock_limits[closest_clk_lvl].dispclk_mhz; - dcn3_16_soc._clock_tmp[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : + s[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : dcn3_16_soc.clock_limits[closest_clk_lvl].dppclk_mhz; - dcn3_16_soc._clock_tmp[i].dram_bw_per_chan_gbps = dcn3_16_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; - dcn3_16_soc._clock_tmp[i].dscclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dscclk_mhz; - dcn3_16_soc._clock_tmp[i].dtbclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; - dcn3_16_soc._clock_tmp[i].phyclk_d18_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; - dcn3_16_soc._clock_tmp[i].phyclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_mhz; + s[i].dram_bw_per_chan_gbps = + dcn3_16_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; + s[i].dscclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dscclk_mhz; + s[i].dtbclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; + s[i].phyclk_d18_mhz = + dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; + s[i].phyclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_mhz; } if (clk_table->num_entries) { dcn3_16_soc.num_states = clk_table->num_entries; } } - memcpy(&dcn3_16_soc.clock_limits, &dcn3_16_soc._clock_tmp, - sizeof(dcn3_16_soc.clock_limits)); + memcpy(dcn3_16_soc.clock_limits, s, sizeof(dcn3_16_soc.clock_limits)); if (max_dispclk_mhz) { dcn3_16_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index 8538588e77545..c596187a1e096 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -179,12 +179,6 @@ struct _vcs_dpi_voltage_scaling_st { */ struct _vcs_dpi_soc_bounding_box_st { struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES]; - /* - * This is a temporary stash for updating @clock_limits with the PMFW - * clock table. Do not use outside of *update_bw_boudning_box functions. - */ - struct _vcs_dpi_voltage_scaling_st _clock_tmp[DC__VOLTAGE_STATES]; - /** * @num_states: It represents the total of Display Power Management * (DPM) supported by the specific ASIC. From 8649c4adc31154f1602750ddc9e7370015494c7b Mon Sep 17 00:00:00 2001 From: Zhen Ni Date: Wed, 3 Aug 2022 17:19:58 +0800 Subject: [PATCH 71/79] drm/amd/pm: Fix a potential gpu_metrics_table memory leak Memory is allocated for gpu_metrics_table in smu_v13_0_4_init_smc_tables(), but not freed in smu_v13_0_4_fini_smc_tables(). This may cause memory leaks, fix it. Reviewed-by: Evan Quan Signed-off-by: Zhen Ni Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c index 64665e8a86e4d..97e1d55dcaad5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c @@ -198,6 +198,9 @@ static int smu_v13_0_4_fini_smc_tables(struct smu_context *smu) kfree(smu_table->watermarks_table); smu_table->watermarks_table = NULL; + kfree(smu_table->gpu_metrics_table); + smu_table->gpu_metrics_table = NULL; + return 0; } From 45f9351cc9d109b7005079e320d04af1e9cdea4d Mon Sep 17 00:00:00 2001 From: Zhen Ni Date: Wed, 3 Aug 2022 17:19:59 +0800 Subject: [PATCH 72/79] drm/amd/pm: Fix a potential gpu_metrics_table memory leak Memory is allocated for gpu_metrics_table in smu_v13_0_5_init_smc_tables(), but not freed in smu_v13_0_5_fini_smc_tables(). This may cause memory leaks, fix it. Reviewed-by: Evan Quan Signed-off-by: Zhen Ni Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c index 47360ef5c1758..66445964efbd1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c @@ -176,6 +176,9 @@ static int smu_v13_0_5_fini_smc_tables(struct smu_context *smu) kfree(smu_table->watermarks_table); smu_table->watermarks_table = NULL; + kfree(smu_table->gpu_metrics_table); + smu_table->gpu_metrics_table = NULL; + return 0; } From 71d3ae01f14151fb07ae52e162b0be5a2e838253 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 4 Aug 2022 15:13:49 -0100 Subject: [PATCH 73/79] drm/amd/display: set panel orientation before drm_dev_register To set the panel orientation property with quirk, we need the mode size provided by EDID. This info is available after EDID is read by dc_link_detect() and updated by amdgpu_dm_update_connector_after_detect(). The detection happens at driver load in amdgpu_dm_initialize_drm_device() and, therefore, we can get modes and set panel orientation before drm_dev_register() to avoid DRM warns on creating the connector property after device registration: [ 2.563969] ------------[ cut here ]------------ [ 2.563971] WARNING: CPU: 6 PID: 325 at drivers/gpu/drm/drm_mode_object.c:45 drm_mode_object_add+0x72/0x80 [drm] [ 2.563997] Modules linked in: btusb btrtl btbcm btintel btmtk bluetooth rfkill ecdh_generic ecc usbhid crc16 amdgpu(+) drm_ttm_helper ttm agpgart gpu_sched i2c_algo_bit drm_display_helper drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm serio_raw sdhci_pci atkbd libps2 cqhci vivaldi_fmap ccp sdhci i8042 crct10dif_pclmul crc32_pclmul hid_multitouch ghash_clmulni_intel aesni_intel crypto_simd cryptd wdat_wdt mmc_core cec xhci_pci sp5100_tco rng_core xhci_pci_renesas serio 8250_dw i2c_hid_acpi i2c_hid btrfs blake2b_generic libcrc32c crc32c_generic crc32c_intel xor raid6_pq dm_mirror dm_region_hash dm_log dm_mod pkcs8_key_parser crypto_user [ 2.564032] CPU: 6 PID: 325 Comm: systemd-udevd Not tainted 5.18.0-amd-staging-drm-next+ #67 [ 2.564034] Hardware name: Valve Jupiter/Jupiter, BIOS F7A0105 03/21/2022 [ 2.564036] RIP: 0010:drm_mode_object_add+0x72/0x80 [drm] [ 2.564053] Code: f0 89 c3 85 c0 78 07 89 45 00 44 89 65 04 4c 89 ef e8 e2 99 04 f1 31 c0 85 db 0f 4e c3 5b 5d 41 5c 41 5d c3 80 7f 50 00 74 ac <0f> 0b eb a8 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 41 54 4c [ 2.564055] RSP: 0018:ffffb2e880413860 EFLAGS: 00010202 [ 2.564056] RAX: ffffffffc0ba1440 RBX: ffff99508a860010 RCX: 0000000000000001 [ 2.564057] RDX: 00000000b0b0b0b0 RSI: ffff99508c050110 RDI: ffff99508a860010 [ 2.564058] RBP: ffff99508c050110 R08: 0000000000000020 R09: ffff99508c292c20 [ 2.564059] R10: 0000000000000000 R11: ffff99508c0507d8 R12: 00000000b0b0b0b0 [ 2.564060] R13: 0000000000000004 R14: ffffffffc068a4b6 R15: ffffffffc068a47f [ 2.564061] FS: 00007fc69b5f1a40(0000) GS:ffff9953aff80000(0000) knlGS:0000000000000000 [ 2.564063] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 2.564063] CR2: 00007f9506804000 CR3: 0000000107f92000 CR4: 0000000000350ee0 [ 2.564065] Call Trace: [ 2.564068] [ 2.564070] drm_property_create+0xc9/0x170 [drm] [ 2.564088] drm_property_create_enum+0x1f/0x70 [drm] [ 2.564105] drm_connector_set_panel_orientation_with_quirk+0x96/0xc0 [drm] [ 2.564123] get_modes+0x4fb/0x530 [amdgpu] [ 2.564378] drm_helper_probe_single_connector_modes+0x1ad/0x850 [drm_kms_helper] [ 2.564390] drm_client_modeset_probe+0x229/0x1400 [drm] [ 2.564411] ? xas_store+0x52/0x5e0 [ 2.564416] ? kmem_cache_alloc_trace+0x177/0x2c0 [ 2.564420] __drm_fb_helper_initial_config_and_unlock+0x44/0x4e0 [drm_kms_helper] [ 2.564430] drm_fbdev_client_hotplug+0x173/0x210 [drm_kms_helper] [ 2.564438] drm_fbdev_generic_setup+0xa5/0x166 [drm_kms_helper] [ 2.564446] amdgpu_pci_probe+0x35e/0x370 [amdgpu] [ 2.564621] local_pci_probe+0x45/0x80 [ 2.564625] ? pci_match_device+0xd7/0x130 [ 2.564627] pci_device_probe+0xbf/0x220 [ 2.564629] ? sysfs_do_create_link_sd+0x69/0xd0 [ 2.564633] really_probe+0x19c/0x380 [ 2.564637] __driver_probe_device+0xfe/0x180 [ 2.564639] driver_probe_device+0x1e/0x90 [ 2.564641] __driver_attach+0xc0/0x1c0 [ 2.564643] ? __device_attach_driver+0xe0/0xe0 [ 2.564644] ? __device_attach_driver+0xe0/0xe0 [ 2.564646] bus_for_each_dev+0x78/0xc0 [ 2.564648] bus_add_driver+0x149/0x1e0 [ 2.564650] driver_register+0x8f/0xe0 [ 2.564652] ? 0xffffffffc1023000 [ 2.564654] do_one_initcall+0x44/0x200 [ 2.564657] ? kmem_cache_alloc_trace+0x177/0x2c0 [ 2.564659] do_init_module+0x4c/0x250 [ 2.564663] __do_sys_init_module+0x12e/0x1b0 [ 2.564666] do_syscall_64+0x3b/0x90 [ 2.564670] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 2.564673] RIP: 0033:0x7fc69bff232e [ 2.564674] Code: 48 8b 0d 45 0b 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 49 89 ca b8 af 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 12 0b 0c 00 f7 d8 64 89 01 48 [ 2.564676] RSP: 002b:00007ffe872ba3e8 EFLAGS: 00000246 ORIG_RAX: 00000000000000af [ 2.564677] RAX: ffffffffffffffda RBX: 000055873f797820 RCX: 00007fc69bff232e [ 2.564678] RDX: 000055873f7bf390 RSI: 0000000001155e81 RDI: 00007fc699e4d010 [ 2.564679] RBP: 00007fc699e4d010 R08: 000055873f7bfe20 R09: 0000000001155e90 [ 2.564680] R10: 000000055873f7bf R11: 0000000000000246 R12: 000055873f7bf390 [ 2.564681] R13: 000000000000000d R14: 000055873f7c4cb0 R15: 000055873f797820 [ 2.564683] [ 2.564683] ---[ end trace 0000000000000000 ]--- [ 2.564696] ------------[ cut here ]------------ [ 2.564696] WARNING: CPU: 6 PID: 325 at drivers/gpu/drm/drm_mode_object.c:242 drm_object_attach_property+0x52/0x80 [drm] [ 2.564717] Modules linked in: btusb btrtl btbcm btintel btmtk bluetooth rfkill ecdh_generic ecc usbhid crc16 amdgpu(+) drm_ttm_helper ttm agpgart gpu_sched i2c_algo_bit drm_display_helper drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm serio_raw sdhci_pci atkbd libps2 cqhci vivaldi_fmap ccp sdhci i8042 crct10dif_pclmul crc32_pclmul hid_multitouch ghash_clmulni_intel aesni_intel crypto_simd cryptd wdat_wdt mmc_core cec xhci_pci sp5100_tco rng_core xhci_pci_renesas serio 8250_dw i2c_hid_acpi i2c_hid btrfs blake2b_generic libcrc32c crc32c_generic crc32c_intel xor raid6_pq dm_mirror dm_region_hash dm_log dm_mod pkcs8_key_parser crypto_user [ 2.564738] CPU: 6 PID: 325 Comm: systemd-udevd Tainted: G W 5.18.0-amd-staging-drm-next+ #67 [ 2.564740] Hardware name: Valve Jupiter/Jupiter, BIOS F7A0105 03/21/2022 [ 2.564741] RIP: 0010:drm_object_attach_property+0x52/0x80 [drm] [ 2.564759] Code: 2d 83 f8 18 74 33 48 89 74 c1 08 48 8b 4f 08 48 89 94 c1 c8 00 00 00 48 8b 47 08 83 00 01 c3 4d 85 d2 75 dd 83 7f 58 01 75 d7 <0f> 0b eb d3 41 80 78 50 00 74 cc 0f 0b eb c8 44 89 ce 48 c7 c7 28 [ 2.564760] RSP: 0018:ffffb2e8804138d8 EFLAGS: 00010246 [ 2.564761] RAX: 0000000000000010 RBX: ffff99508c1a2000 RCX: ffff99508c1a2180 [ 2.564762] RDX: 0000000000000003 RSI: ffff99508c050100 RDI: ffff99508c1a2040 [ 2.564763] RBP: 00000000ffffffff R08: ffff99508a860010 R09: 00000000c0c0c0c0 [ 2.564763] R10: 0000000000000000 R11: 0000000000000020 R12: ffff99508a860010 [ 2.564764] R13: ffff995088733008 R14: ffff99508c1a2000 R15: ffffffffc068a47f [ 2.564765] FS: 00007fc69b5f1a40(0000) GS:ffff9953aff80000(0000) knlGS:0000000000000000 [ 2.564766] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 2.564767] CR2: 00007f9506804000 CR3: 0000000107f92000 CR4: 0000000000350ee0 [ 2.564768] Call Trace: [ 2.564769] [ 2.564770] drm_connector_set_panel_orientation_with_quirk+0x4a/0xc0 [drm] [ 2.564789] get_modes+0x4fb/0x530 [amdgpu] [ 2.565024] drm_helper_probe_single_connector_modes+0x1ad/0x850 [drm_kms_helper] [ 2.565036] drm_client_modeset_probe+0x229/0x1400 [drm] [ 2.565056] ? xas_store+0x52/0x5e0 [ 2.565060] ? kmem_cache_alloc_trace+0x177/0x2c0 [ 2.565062] __drm_fb_helper_initial_config_and_unlock+0x44/0x4e0 [drm_kms_helper] [ 2.565072] drm_fbdev_client_hotplug+0x173/0x210 [drm_kms_helper] [ 2.565080] drm_fbdev_generic_setup+0xa5/0x166 [drm_kms_helper] [ 2.565088] amdgpu_pci_probe+0x35e/0x370 [amdgpu] [ 2.565261] local_pci_probe+0x45/0x80 [ 2.565263] ? pci_match_device+0xd7/0x130 [ 2.565265] pci_device_probe+0xbf/0x220 [ 2.565267] ? sysfs_do_create_link_sd+0x69/0xd0 [ 2.565268] really_probe+0x19c/0x380 [ 2.565270] __driver_probe_device+0xfe/0x180 [ 2.565272] driver_probe_device+0x1e/0x90 [ 2.565274] __driver_attach+0xc0/0x1c0 [ 2.565276] ? __device_attach_driver+0xe0/0xe0 [ 2.565278] ? __device_attach_driver+0xe0/0xe0 [ 2.565279] bus_for_each_dev+0x78/0xc0 [ 2.565281] bus_add_driver+0x149/0x1e0 [ 2.565283] driver_register+0x8f/0xe0 [ 2.565285] ? 0xffffffffc1023000 [ 2.565286] do_one_initcall+0x44/0x200 [ 2.565288] ? kmem_cache_alloc_trace+0x177/0x2c0 [ 2.565290] do_init_module+0x4c/0x250 [ 2.565291] __do_sys_init_module+0x12e/0x1b0 [ 2.565294] do_syscall_64+0x3b/0x90 [ 2.565296] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 2.565297] RIP: 0033:0x7fc69bff232e [ 2.565298] Code: 48 8b 0d 45 0b 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 49 89 ca b8 af 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 12 0b 0c 00 f7 d8 64 89 01 48 [ 2.565299] RSP: 002b:00007ffe872ba3e8 EFLAGS: 00000246 ORIG_RAX: 00000000000000af [ 2.565301] RAX: ffffffffffffffda RBX: 000055873f797820 RCX: 00007fc69bff232e [ 2.565302] RDX: 000055873f7bf390 RSI: 0000000001155e81 RDI: 00007fc699e4d010 [ 2.565303] RBP: 00007fc699e4d010 R08: 000055873f7bfe20 R09: 0000000001155e90 [ 2.565303] R10: 000000055873f7bf R11: 0000000000000246 R12: 000055873f7bf390 [ 2.565304] R13: 000000000000000d R14: 000055873f7c4cb0 R15: 000055873f797820 [ 2.565306] [ 2.565307] ---[ end trace 0000000000000000 ]--- -- v2: - call amdgpu_dm_connector_get_modes() instead of ddc_get_modes() (Harry) Fixes: d77de7880e0e0 ("amd/display: enable panel orientation quirks") Acked-by: Hans de Goede Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3ee96e36423c0..781020d6b9376 100755 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4380,6 +4380,7 @@ static void register_backlight_device(struct amdgpu_display_manager *dm, } } +static void amdgpu_set_panel_orientation(struct drm_connector *connector); /* * In this architecture, the association @@ -4573,6 +4574,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) adev_to_drm(adev)->vblank_disable_immediate = false; } } + amdgpu_set_panel_orientation(&aconnector->base); } /* Software is initialized. Now we can register interrupt handlers. */ @@ -7263,6 +7265,10 @@ static void amdgpu_set_panel_orientation(struct drm_connector *connector) connector->connector_type != DRM_MODE_CONNECTOR_LVDS) return; + mutex_lock(&connector->dev->mode_config.mutex); + amdgpu_dm_connector_get_modes(connector); + mutex_unlock(&connector->dev->mode_config.mutex); + encoder = amdgpu_dm_connector_to_encoder(connector); if (!encoder) return; @@ -7307,8 +7313,6 @@ static void amdgpu_dm_connector_ddc_get_modes(struct drm_connector *connector, * restored here. */ amdgpu_dm_update_freesync_caps(connector, edid); - - amdgpu_set_panel_orientation(connector); } else { amdgpu_dm_connector->num_modes = 0; } From 8e59f128e9d30009998c777817697815ee078cc9 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 9 Aug 2022 11:44:05 -0400 Subject: [PATCH 74/79] drm/amdgpu: Only disable prefer_shadow on hawaii We changed it for all asics due to a hibernation regression on hawaii, but the workaround breaks suspend on a polaris12. Just disable it for hawaii. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216119 Fixes: 3a4b1cc28fbd ("drm/amdgpu/display: disable prefer_shadow for generic fb helpers") Reviewed-and-tested-by: Mario Limonciello Signed-off-by: Alex Deucher Signed-off-by: Asher Song --- drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c | 3 +-- drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 3 +-- drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 3 +-- drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 3 +-- drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 7 +++++-- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 +++++-- 6 files changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index 6c0ea34cc75e0..84a815389885b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -652,8 +652,7 @@ static int amdgpu_vkms_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = YRES_MAX; adev_to_drm(adev)->mode_config.preferred_depth = 24; - /* disable prefer shadow for now due to hibernation issues */ - adev_to_drm(adev)->mode_config.prefer_shadow = 0; + adev_to_drm(adev)->mode_config.prefer_shadow = 1; adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 9c038b037a284..3072c18221ad0 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2888,8 +2888,7 @@ static int dce_v10_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - /* disable prefer shadow for now due to hibernation issues */ - adev_to_drm(adev)->mode_config.prefer_shadow = 0; + adev_to_drm(adev)->mode_config.prefer_shadow = 1; #ifdef HAVE_DRM_MODE_CONFIG_FB_MODIFIERS_NOT_SUPPORTED adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index f7553590141d7..3d124b6648213 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -3006,8 +3006,7 @@ static int dce_v11_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - /* disable prefer shadow for now due to hibernation issues */ - adev_to_drm(adev)->mode_config.prefer_shadow = 0; + adev_to_drm(adev)->mode_config.prefer_shadow = 1; #ifdef HAVE_DRM_MODE_CONFIG_FB_MODIFIERS_NOT_SUPPORTED adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 2f5383ae0b3b6..f794839e5ecc4 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -2773,8 +2773,7 @@ static int dce_v6_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_width = 16384; adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - /* disable prefer shadow for now due to hibernation issues */ - adev_to_drm(adev)->mode_config.prefer_shadow = 0; + adev_to_drm(adev)->mode_config.prefer_shadow = 1; #ifdef HAVE_DRM_MODE_CONFIG_FB_MODIFIERS_NOT_SUPPORTED adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index adf6f2507e39d..d787144c8bd35 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2784,8 +2784,11 @@ static int dce_v8_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - /* disable prefer shadow for now due to hibernation issues */ - adev_to_drm(adev)->mode_config.prefer_shadow = 0; + if (adev->asic_type == CHIP_HAWAII) + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; + else + adev_to_drm(adev)->mode_config.prefer_shadow = 1; #ifdef HAVE_DRM_MODE_CONFIG_FB_MODIFIERS_NOT_SUPPORTED adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 781020d6b9376..5232aca9f3710 100755 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4017,8 +4017,11 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - /* disable prefer shadow for now due to hibernation issues */ - adev_to_drm(adev)->mode_config.prefer_shadow = 0; + if (adev->asic_type == CHIP_HAWAII) + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; + else + adev_to_drm(adev)->mode_config.prefer_shadow = 1; /* indicates support for immediate flip */ adev_to_drm(adev)->mode_config.async_page_flip = true; From 49c1266fb43742c8a4583ca682306ad93e66acd6 Mon Sep 17 00:00:00 2001 From: Sebin Sebastian Date: Sat, 30 Jul 2022 09:16:58 +0530 Subject: [PATCH 75/79] drm/amdgpu: double free error and freeing uninitialized null pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a double free and an uninitialized pointer read error. Both tmp and new are pointing at same address and both are freed which leads to double free. Adding a check to verify if new and tmp are free in the error_free label fixes the double free issue. new is not initialized to null which also leads to a free on an uninitialized pointer. Reviewed-by: André Almeida Suggested by: S. Amaranath Signed-off-by: Sebin Sebastian Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 06d8d7a13296d..27670770b384e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1752,7 +1752,7 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f, { struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; char reg_offset[11]; - uint32_t *new, *tmp = NULL; + uint32_t *new = NULL, *tmp = NULL; int ret, i = 0, len = 0; do { @@ -1799,7 +1799,8 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f, ret = size; error_free: - kfree(tmp); + if (tmp != new) + kfree(tmp); kfree(new); return ret; } From fb87ce84e41dec8d6e711b626ff35ca66f43c309 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Wed, 3 Aug 2022 16:13:56 +0800 Subject: [PATCH 76/79] drm/amdgpu: disable 3DCGCG/CGLS temporarily due to stability issue Some stability issues were reported with these features. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Change-Id: I5fccae63bd530d6ff9e4167bb1c44319830806f9 --- drivers/gpu/drm/amd/amdgpu/soc21.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index bbbf760f8ad2b..77d549dbe2a8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -546,8 +546,10 @@ static int soc21_common_early_init(void *handle) case IP_VERSION(11, 0, 0): adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS | +#if 0 AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS | +#endif AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_REPEATER_FGCG | AMD_CG_SUPPORT_GFX_FGCG | From 81a5ce49a642ce50f11167a367b89a4c45de2033 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Wed, 10 Aug 2022 11:08:31 +0800 Subject: [PATCH 77/79] drm/amd/pm: add missing ->fini_microcode interface for Sienna Cichlid To avoid any potential memory leak. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Change-Id: Ide94e85d862c10aae93e04e3ce64c0451e5e1570 --- drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index c58c504645665..2765ebf8720a8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -4335,6 +4335,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .dump_pptable = sienna_cichlid_dump_pptable, .init_microcode = smu_v11_0_init_microcode, .load_microcode = smu_v11_0_load_microcode, + .fini_microcode = smu_v11_0_fini_microcode, .init_smc_tables = sienna_cichlid_init_smc_tables, .fini_smc_tables = smu_v11_0_fini_smc_tables, .init_power = smu_v11_0_init_power, From 88ba986cf4643551af08b2b8b537af1ecbbbd730 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Wed, 10 Aug 2022 11:11:16 +0800 Subject: [PATCH 78/79] drm/amd/pm: add missing ->fini_xxxx interfaces for some SMU13 asics Without these, potential memory leak may be induced. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Change-Id: Ied7cd204d44d739dece11774c9d29e7a192fd341 --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 2 ++ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 1bbeceeb9e3cb..df4a47acd7247 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -1792,7 +1792,9 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .dump_pptable = smu_v13_0_0_dump_pptable, .init_microcode = smu_v13_0_init_microcode, .load_microcode = smu_v13_0_load_microcode, + .fini_microcode = smu_v13_0_fini_microcode, .init_smc_tables = smu_v13_0_0_init_smc_tables, + .fini_smc_tables = smu_v13_0_fini_smc_tables, .init_power = smu_v13_0_init_power, .fini_power = smu_v13_0_fini_power, .check_fw_status = smu_v13_0_check_fw_status, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 9dd56e73218be..d78375d9a1414 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -1574,7 +1574,9 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .dump_pptable = smu_v13_0_7_dump_pptable, .init_microcode = smu_v13_0_init_microcode, .load_microcode = smu_v13_0_load_microcode, + .fini_microcode = smu_v13_0_fini_microcode, .init_smc_tables = smu_v13_0_7_init_smc_tables, + .fini_smc_tables = smu_v13_0_fini_smc_tables, .init_power = smu_v13_0_init_power, .fini_power = smu_v13_0_fini_power, .check_fw_status = smu_v13_0_7_check_fw_status, From 41d69d420c84c25a4a734031d5f7f2d61061fe02 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Mon, 8 Aug 2022 20:16:39 -0400 Subject: [PATCH 79/79] drm/amd/dkms: Bump package version to 5.18.0 DKMS packages built from staging should have verion 5.18, not 5.16. Signed-off-by: Felix Kuehling Reviewed-by: Hawking Zhang Reviewed-by: Leslie Shi --- drivers/gpu/drm/amd/dkms/configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/dkms/configure.ac b/drivers/gpu/drm/amd/dkms/configure.ac index 3feca735c2140..eada36c6a9f53 100644 --- a/drivers/gpu/drm/amd/dkms/configure.ac +++ b/drivers/gpu/drm/amd/dkms/configure.ac @@ -1,4 +1,4 @@ -AC_INIT(amdgpu-dkms, 5.16.0) +AC_INIT(amdgpu-dkms, 5.18.0) AC_LANG(C) AC_CONFIG_AUX_DIR([config])