From 0b15205c7325dc20b7da0068307670d222d66949 Mon Sep 17 00:00:00 2001 From: Sonny Jiang Date: Tue, 12 Jul 2022 16:33:10 -0400 Subject: [PATCH 001/134] drm/amdgpu: limiting AV1 to first instance on VCN4 decode AV1 is only supported on first instance. Signed-off-by: Sonny Jiang Reviewed-by: James Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 131 ++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 84ac2401895a4..a91ffbf902d48 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -25,6 +25,7 @@ #include "amdgpu.h" #include "amdgpu_vcn.h" #include "amdgpu_pm.h" +#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" #include "soc15_hw_ip.h" @@ -44,6 +45,9 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define RDECODE_MSG_CREATE 0x00000000 +#define RDECODE_MESSAGE_CREATE 0x00000001 + static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 @@ -1323,6 +1327,132 @@ static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring) } } +static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p) +{ + struct drm_gpu_scheduler **scheds; + + /* The create msg must be in the first IB submitted */ + if (atomic_read(&p->entity->fence_seq)) + return -EINVAL; + + scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC] + [AMDGPU_RING_PRIO_0].sched; + drm_sched_entity_modify_sched(p->entity, scheds, 1); + return 0; +} + +static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) +{ + struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_bo_va_mapping *map; + uint32_t *msg, num_buffers; + struct amdgpu_bo *bo; + uint64_t start, end; + unsigned int i; + void *ptr; + int r; + + addr &= AMDGPU_GMC_HOLE_MASK; + r = amdgpu_cs_find_mapping(p, addr, &bo, &map); + if (r) { + DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr); + return r; + } + + start = map->start * AMDGPU_GPU_PAGE_SIZE; + end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE; + if (addr & 0x7) { + DRM_ERROR("VCN messages must be 8 byte aligned!\n"); + return -EINVAL; + } + + bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (r) { + DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r); + return r; + } + + r = amdgpu_bo_kmap(bo, &ptr); + if (r) { + DRM_ERROR("Failed mapping the VCN message (%d)!\n", r); + return r; + } + + msg = ptr + addr - start; + + /* Check length */ + if (msg[1] > end - addr) { + r = -EINVAL; + goto out; + } + + if (msg[3] != RDECODE_MSG_CREATE) + goto out; + + num_buffers = msg[2]; + for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) { + uint32_t offset, size, *create; + + if (msg[0] != RDECODE_MESSAGE_CREATE) + continue; + + offset = msg[1]; + size = msg[2]; + + if (offset + size > end) { + r = -EINVAL; + goto out; + } + + create = ptr + addr + offset - start; + + /* H246, HEVC and VP9 can run on any instance */ + if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) + continue; + + r = vcn_v4_0_limit_sched(p); + if (r) + goto out; + } + +out: + amdgpu_bo_kunmap(bo); + return r; +} + +#define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003) + +static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); + struct amdgpu_vcn_decode_buffer *decode_buffer = NULL; + uint32_t val; + int r = 0; + + /* The first instance can decode anything */ + if (!ring->me) + return r; + + /* unified queue ib header has 8 double words. */ + if (ib->length_dw < 8) + return r; + + val = amdgpu_ib_get_value(ib, 6); //RADEON_VCN_ENGINE_TYPE + + if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { + decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[10]; + + if (decode_buffer->valid_buf_flag & 0x1) + r = vcn_v4_0_dec_msg(p, ((u64)decode_buffer->msg_buffer_address_hi) << 32 | + decode_buffer->msg_buffer_address_lo); + } + return r; +} + static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, @@ -1331,6 +1461,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .get_rptr = vcn_v4_0_unified_ring_get_rptr, .get_wptr = vcn_v4_0_unified_ring_get_wptr, .set_wptr = vcn_v4_0_unified_ring_set_wptr, + .patch_cs_in_place = vcn_v4_0_ring_patch_cs_in_place, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + From f6a3f66063ca39e7ee5fcee59e889c5ec4de9dc0 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Wed, 13 Jul 2022 12:57:50 -0400 Subject: [PATCH 002/134] drm/amdgpu: Get rid of amdgpu_job->external_hw_fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a follow-up cleanup to [1]. See bellow refcount balancing for calling amdgpu_job_submit_direct after this cleanup as far as I calculated. amdgpu_fence_emit dma_fence_init 1 dma_fence_get(fence) 2 rcu_assign_pointer(*ptr, dma_fence_get(fence) 3 ---> amdgpu_job_submit_direct completes before fence signaled amdgpu_sa_bo_free (*sa_bo)->fence = dma_fence_get(fence) 4 amdgpu_job_free dma_fence_put 3 amdgpu_vcn_enc_get_destroy_msg *fence = dma_fence_get(f) 4 dma_fence_put(f); 3 amdgpu_vcn_enc_ring_test_ib dma_fence_put(fence) 2 amdgpu_fence_process dma_fence_put 1 amdgpu_sa_bo_remove_locked dma_fence_put 0 ---> amdgpu_job_submit_direct completes after fence signaled amdgpu_fence_process dma_fence_put 2 amdgpu_job_free dma_fence_put 1 amdgpu_vcn_enc_get_destroy_msg *fence = dma_fence_get(f) 2 dma_fence_put(f); 1 amdgpu_vcn_enc_ring_test_ib dma_fence_put(fence) 0 [1] - https://patchwork.kernel.org/project/dri-devel/cover/20220624180955.485440-1-andrey.grodzovsky@amd.com/ Signed-off-by: Andrey Grodzovsky Suggested-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 27 ++++------------------ drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 1 - 3 files changed, 6 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e1c9587f659b5..041bd906449db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5230,8 +5230,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * * job->base holds a reference to parent fence */ - if (job && (job->hw_fence.ops != NULL) && - dma_fence_is_signaled(&job->hw_fence)) { + if (job && dma_fence_is_signaled(&job->hw_fence)) { job_signaled = true; dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); goto skip_hw_reset; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 36c1be77bf8fa..5071b96be9824 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -133,16 +133,10 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) { struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); struct dma_fence *f; - struct dma_fence *hw_fence; unsigned i; - if (job->hw_fence.ops == NULL) - hw_fence = job->external_hw_fence; - else - hw_fence = &job->hw_fence; - /* use sched fence if available */ - f = job->base.s_fence ? &job->base.s_fence->finished : hw_fence; + f = job->base.s_fence ? &job->base.s_fence->finished : &job->hw_fence; for (i = 0; i < job->num_ibs; ++i) amdgpu_ib_free(ring->adev, &job->ibs[i], f); } @@ -156,11 +150,7 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->sched_sync); - /* only put the hw fence if has embedded fence */ - if (job->hw_fence.ops != NULL) - dma_fence_put(&job->hw_fence); - else - kfree(job); + dma_fence_put(&job->hw_fence); } void amdgpu_job_free(struct amdgpu_job *job) @@ -169,11 +159,7 @@ void amdgpu_job_free(struct amdgpu_job *job) amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->sched_sync); - /* only put the hw fence if has embedded fence */ - if (job->hw_fence.ops != NULL) - dma_fence_put(&job->hw_fence); - else - kfree(job); + dma_fence_put(&job->hw_fence); } int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, @@ -203,15 +189,12 @@ int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring, int r; job->base.sched = &ring->sched; - r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, NULL, fence); - /* record external_hw_fence for direct submit */ - job->external_hw_fence = dma_fence_get(*fence); + r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job, fence); + if (r) return r; amdgpu_job_free(job); - dma_fence_put(*fence); - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index d599c0540b46f..babc0af751c2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -50,7 +50,6 @@ struct amdgpu_job { struct amdgpu_sync sync; struct amdgpu_sync sched_sync; struct dma_fence hw_fence; - struct dma_fence *external_hw_fence; uint32_t preamble_status; uint32_t preemption_status; bool vm_needs_flush; From 75510fac07cdde23c9217c8299b6cd64c689fb2b Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 4 Jul 2022 17:16:38 +0800 Subject: [PATCH 003/134] drm/amdgpu: add umc v8_10_0 ip headers Add umc v8_10_0 register offset and shift masks header files Signed-off-by: YiPeng Chai Reviewed-by: Alexander Deucher Reviewed-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- .../include/asic_reg/umc/umc_8_10_0_offset.h | 33 +++++++ .../include/asic_reg/umc/umc_8_10_0_sh_mask.h | 94 +++++++++++++++++++ 2 files changed, 127 insertions(+) create mode 100644 drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_offset.h create mode 100644 drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_sh_mask.h diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_offset.h new file mode 100644 index 0000000000000..b798cf5a2c39c --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_offset.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _umc_8_10_0_OFFSET_HEADER +#define _umc_8_10_0_OFFSET_HEADER + +#define regUMCCH0_0_GeccErrCntSel 0x0328 +#define regUMCCH0_0_GeccErrCntSel_BASE_IDX 2 +#define regUMCCH0_0_GeccErrCnt 0x0329 +#define regUMCCH0_0_GeccErrCnt_BASE_IDX 2 +#define regMCA_UMC_UMC0_MCUMC_STATUST0 0x03c2 +#define regMCA_UMC_UMC0_MCUMC_STATUST0_BASE_IDX 2 +#define regMCA_UMC_UMC0_MCUMC_ADDRT0 0x03c4 +#define regMCA_UMC_UMC0_MCUMC_ADDRT0_BASE_IDX 2 + +#endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_sh_mask.h new file mode 100644 index 0000000000000..bd99b431247f3 --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_10_0_sh_mask.h @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _umc_8_10_0_SH_MASK_HEADER +#define _umc_8_10_0_SH_MASK_HEADER + +//UMCCH0_0_GeccErrCntSel +#define UMCCH0_0_GeccErrCntSel__GeccErrInt__SHIFT 0xc +#define UMCCH0_0_GeccErrCntSel__GeccErrCntEn__SHIFT 0xf +#define UMCCH0_0_GeccErrCntSel__PoisonCntEn__SHIFT 0x10 +#define UMCCH0_0_GeccErrCntSel__GeccErrInt_MASK 0x00003000L +#define UMCCH0_0_GeccErrCntSel__GeccErrCntEn_MASK 0x00008000L +#define UMCCH0_0_GeccErrCntSel__PoisonCntEn_MASK 0x00030000L +//UMCCH0_0_GeccErrCnt +#define UMCCH0_0_GeccErrCnt__GeccErrCnt__SHIFT 0x0 +#define UMCCH0_0_GeccErrCnt__GeccUnCorrErrCnt__SHIFT 0x10 +#define UMCCH0_0_GeccErrCnt__GeccErrCnt_MASK 0x0000FFFFL +#define UMCCH0_0_GeccErrCnt__GeccUnCorrErrCnt_MASK 0xFFFF0000L +//MCA_UMC_UMC0_MCUMC_STATUST0 +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrorCode__SHIFT 0x0 +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrorCodeExt__SHIFT 0x10 +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV22__SHIFT 0x16 +#define MCA_UMC_UMC0_MCUMC_STATUST0__AddrLsb__SHIFT 0x18 +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV30__SHIFT 0x1e +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrCoreId__SHIFT 0x20 +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV38__SHIFT 0x26 +#define MCA_UMC_UMC0_MCUMC_STATUST0__Scrub__SHIFT 0x28 +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV41__SHIFT 0x29 +#define MCA_UMC_UMC0_MCUMC_STATUST0__Poison__SHIFT 0x2b +#define MCA_UMC_UMC0_MCUMC_STATUST0__Deferred__SHIFT 0x2c +#define MCA_UMC_UMC0_MCUMC_STATUST0__UECC__SHIFT 0x2d +#define MCA_UMC_UMC0_MCUMC_STATUST0__CECC__SHIFT 0x2e +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV47__SHIFT 0x2f +#define MCA_UMC_UMC0_MCUMC_STATUST0__Transparent__SHIFT 0x34 +#define MCA_UMC_UMC0_MCUMC_STATUST0__SyndV__SHIFT 0x35 +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV54__SHIFT 0x36 +#define MCA_UMC_UMC0_MCUMC_STATUST0__TCC__SHIFT 0x37 +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrCoreIdVal__SHIFT 0x38 +#define MCA_UMC_UMC0_MCUMC_STATUST0__PCC__SHIFT 0x39 +#define MCA_UMC_UMC0_MCUMC_STATUST0__AddrV__SHIFT 0x3a +#define MCA_UMC_UMC0_MCUMC_STATUST0__MiscV__SHIFT 0x3b +#define MCA_UMC_UMC0_MCUMC_STATUST0__En__SHIFT 0x3c +#define MCA_UMC_UMC0_MCUMC_STATUST0__UC__SHIFT 0x3d +#define MCA_UMC_UMC0_MCUMC_STATUST0__Overflow__SHIFT 0x3e +#define MCA_UMC_UMC0_MCUMC_STATUST0__Val__SHIFT 0x3f +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrorCode_MASK 0x000000000000FFFFL +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrorCodeExt_MASK 0x00000000003F0000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV22_MASK 0x0000000000C00000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__AddrLsb_MASK 0x000000003F000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV30_MASK 0x00000000C0000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrCoreId_MASK 0x0000003F00000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV38_MASK 0x000000C000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__Scrub_MASK 0x0000010000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV41_MASK 0x0000060000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__Poison_MASK 0x0000080000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__Deferred_MASK 0x0000100000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__UECC_MASK 0x0000200000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__CECC_MASK 0x0000400000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV47_MASK 0x000F800000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__Transparent_MASK 0x0010000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__SyndV_MASK 0x0020000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__RESERV54_MASK 0x0040000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__TCC_MASK 0x0080000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__ErrCoreIdVal_MASK 0x0100000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__PCC_MASK 0x0200000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__AddrV_MASK 0x0400000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__MiscV_MASK 0x0800000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__En_MASK 0x1000000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__UC_MASK 0x2000000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__Overflow_MASK 0x4000000000000000L +#define MCA_UMC_UMC0_MCUMC_STATUST0__Val_MASK 0x8000000000000000L +//MCA_UMC_UMC0_MCUMC_ADDRT0 +#define MCA_UMC_UMC0_MCUMC_ADDRT0__ErrorAddr__SHIFT 0x0 +#define MCA_UMC_UMC0_MCUMC_ADDRT0__Reserved__SHIFT 0x38 +#define MCA_UMC_UMC0_MCUMC_ADDRT0__ErrorAddr_MASK 0x00FFFFFFFFFFFFFFL + +#endif From e4b1edf48fa37cf4f5ca403e384731fe28d13691 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Mon, 4 Jul 2022 17:18:14 +0800 Subject: [PATCH 004/134] drm/amdgpu: add umc ras functions for umc v8_10_0 1. Support query umc ras error counter. 2. Support ras umc ue error address remapping. Signed-off-by: YiPeng Chai Reviewed-by: Alexander Deucher Reviewed-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 10 + drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 27 +- drivers/gpu/drm/amd/amdgpu/umc_v8_10.c | 357 ++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/umc_v8_10.h | 70 +++++ 5 files changed, 464 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/umc_v8_10.c create mode 100644 drivers/gpu/drm/amd/amdgpu/umc_v8_10.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index a87e42c2c8dc8..c7d0cd15b5ef6 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -93,7 +93,7 @@ amdgpu-y += \ # add UMC block amdgpu-y += \ - umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o + umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o # add IH block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 2ec6698aa1fe5..3629d8f292ef9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -41,6 +41,12 @@ #define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++) #define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst)) +#define LOOP_UMC_NODE_INST(node_inst) \ + for ((node_inst) = 0; (node_inst) < adev->umc.node_inst_num; (node_inst)++) + +#define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) \ + LOOP_UMC_NODE_INST((node_inst)) LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst)) + struct amdgpu_umc_ras { struct amdgpu_ras_block_object ras_block; void (*err_cnt_init)(struct amdgpu_device *adev); @@ -62,6 +68,10 @@ struct amdgpu_umc { uint32_t channel_inst_num; /* number of umc instance with memory map register access */ uint32_t umc_inst_num; + + /*number of umc node instance with memory map register access*/ + uint32_t node_inst_num; + /* UMC regiser per channel offset */ uint32_t channel_offs; /* channel index table of interleaved memory */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index edbdc0b934ea4..503e40a903191 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -25,7 +25,7 @@ #include "amdgpu.h" #include "amdgpu_atomfirmware.h" #include "gmc_v11_0.h" -#include "umc_v8_7.h" +#include "umc_v8_10.h" #include "athub/athub_3_0_0_sh_mask.h" #include "athub/athub_3_0_0_offset.h" #include "oss/osssys_6_0_0_offset.h" @@ -537,11 +537,36 @@ static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev) { switch (adev->ip_versions[UMC_HWIP][0]) { case IP_VERSION(8, 10, 0): + adev->umc.channel_inst_num = UMC_V8_10_CHANNEL_INSTANCE_NUM; + adev->umc.umc_inst_num = UMC_V8_10_UMC_INSTANCE_NUM; + adev->umc.node_inst_num = adev->gmc.num_umc; + adev->umc.max_ras_err_cnt_per_query = UMC_V8_10_TOTAL_CHANNEL_NUM(adev); + adev->umc.channel_offs = UMC_V8_10_PER_CHANNEL_OFFSET; + adev->umc.channel_idx_tbl = &umc_v8_10_channel_idx_tbl[0][0][0]; + adev->umc.ras = &umc_v8_10_ras; + break; case IP_VERSION(8, 11, 0): break; default: break; } + + if (adev->umc.ras) { + amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block); + + strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc"); + adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC; + adev->umc.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm; + + /* If don't define special ras_late_init function, use default ras_late_init */ + if (!adev->umc.ras->ras_block.ras_late_init) + adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init; + + /* If not define special ras_cb function, use default ras_cb */ + if (!adev->umc.ras->ras_block.ras_cb) + adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb; + } } diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c new file mode 100644 index 0000000000000..36a2053f2e8b9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c @@ -0,0 +1,357 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "umc_v8_10.h" +#include "amdgpu_ras.h" +#include "amdgpu_umc.h" +#include "amdgpu.h" +#include "umc/umc_8_10_0_offset.h" +#include "umc/umc_8_10_0_sh_mask.h" + +#define UMC_8_NODE_DIST 0x800000 +#define UMC_8_INST_DIST 0x4000 + +struct channelnum_map_colbit { + uint32_t channel_num; + uint32_t col_bit; +}; + +const struct channelnum_map_colbit umc_v8_10_channelnum_map_colbit_table[] = { + {24, 13}, + {20, 13}, + {16, 12}, + {14, 12}, + {12, 12}, + {10, 12}, + {6, 11}, +}; + +const uint32_t + umc_v8_10_channel_idx_tbl[] + [UMC_V8_10_UMC_INSTANCE_NUM] + [UMC_V8_10_CHANNEL_INSTANCE_NUM] = { + {{16, 18}, {17, 19}}, + {{15, 11}, {3, 7}}, + {{1, 5}, {13, 9}}, + {{23, 21}, {22, 20}}, + {{0, 4}, {12, 8}}, + {{14, 10}, {2, 6}} + }; + +static inline uint32_t get_umc_v8_10_reg_offset(struct amdgpu_device *adev, + uint32_t node_inst, + uint32_t umc_inst, + uint32_t ch_inst) +{ + return adev->umc.channel_offs * ch_inst + UMC_8_INST_DIST * umc_inst + + UMC_8_NODE_DIST * node_inst; +} + +static void umc_v8_10_clear_error_count_per_channel(struct amdgpu_device *adev, + uint32_t umc_reg_offset) +{ + uint32_t ecc_err_cnt_addr; + + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); + + /* clear error count */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, + UMC_V8_10_CE_CNT_INIT); +} + +static void umc_v8_10_clear_error_count(struct amdgpu_device *adev) +{ + uint32_t node_inst = 0; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { + umc_reg_offset = get_umc_v8_10_reg_offset(adev, + node_inst, + umc_inst, + ch_inst); + + umc_v8_10_clear_error_count_per_channel(adev, + umc_reg_offset); + } +} + +static void umc_v8_10_query_correctable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint32_t ecc_err_cnt, ecc_err_cnt_addr; + uint64_t mc_umc_status; + uint32_t mc_umc_status_addr; + + /* UMC 8_10 registers */ + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); + + ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); + *error_count += + (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_GeccErrCnt, GeccErrCnt) - + UMC_V8_10_CE_CNT_INIT); + + /* Check for SRAM correctable error, MCUMC_STATUS is a 64 bit register */ + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + *error_count += 1; +} + +static void umc_v8_10_query_uncorrectable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint64_t mc_umc_status; + uint32_t mc_umc_status_addr; + + mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); + + /* Check the MCUMC_STATUS. */ + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); + if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) + *error_count += 1; +} + +static void umc_v8_10_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + uint32_t node_inst = 0; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { + umc_reg_offset = get_umc_v8_10_reg_offset(adev, + node_inst, + umc_inst, + ch_inst); + + umc_v8_10_query_correctable_error_count(adev, + umc_reg_offset, + &(err_data->ce_count)); + umc_v8_10_query_uncorrectable_error_count(adev, + umc_reg_offset, + &(err_data->ue_count)); + } + + umc_v8_10_clear_error_count(adev); +} + +static uint32_t umc_v8_10_get_col_bit(uint32_t channel_num) +{ + uint32_t t = 0; + + for (t = 0; t < ARRAY_SIZE(umc_v8_10_channelnum_map_colbit_table); t++) + if (channel_num == umc_v8_10_channelnum_map_colbit_table[t].channel_num) + return umc_v8_10_channelnum_map_colbit_table[t].col_bit; + + /* Failed to get col_bit. */ + return U32_MAX; +} + +/* + * Mapping normal address to soc physical address in swizzle mode. + */ +static int umc_v8_10_swizzle_mode_na_to_pa(struct amdgpu_device *adev, + uint32_t channel_idx, + uint64_t na, uint64_t *soc_pa) +{ + uint32_t channel_num = UMC_V8_10_TOTAL_CHANNEL_NUM(adev); + uint32_t col_bit = umc_v8_10_get_col_bit(channel_num); + uint64_t tmp_addr; + + if (col_bit == U32_MAX) + return -1; + + tmp_addr = SWIZZLE_MODE_TMP_ADDR(na, channel_num, channel_idx); + *soc_pa = SWIZZLE_MODE_ADDR_HI(tmp_addr, col_bit) | + SWIZZLE_MODE_ADDR_MID(na, col_bit) | + SWIZZLE_MODE_ADDR_LOW(tmp_addr, col_bit) | + SWIZZLE_MODE_ADDR_LSB(na); + + return 0; +} + +static void umc_v8_10_query_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, + uint32_t umc_reg_offset, + uint32_t node_inst, + uint32_t ch_inst, + uint32_t umc_inst) +{ + uint64_t mc_umc_status_addr; + uint64_t mc_umc_status, err_addr; + uint32_t channel_index; + + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); + mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); + + if (mc_umc_status == 0) + return; + + if (!err_data->err_addr) { + /* clear umc status */ + WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); + return; + } + + channel_index = + adev->umc.channel_idx_tbl[node_inst * adev->umc.umc_inst_num * + adev->umc.channel_inst_num + + umc_inst * adev->umc.channel_inst_num + + ch_inst]; + + /* calculate error address if ue/ce error is detected */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { + uint32_t addr_lsb; + uint64_t mc_umc_addrt0; + + mc_umc_addrt0 = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0); + err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); + err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + + /* the lowest lsb bits should be ignored */ + addr_lsb = REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrLsb); + + err_addr &= ~((0x1ULL << addr_lsb) - 1); + + /* we only save ue error information currently, ce is skipped */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { + uint64_t na_err_addr_base = err_addr & ~(0x3ULL << UMC_V8_10_NA_C5_BIT); + uint64_t na_err_addr, retired_page_addr; + uint32_t col = 0; + int ret = 0; + + /* loop for all possibilities of [C6 C5] in normal address. */ + for (col = 0; col < UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM; col++) { + na_err_addr = na_err_addr_base | (col << UMC_V8_10_NA_C5_BIT); + + /* Mapping normal error address to retired soc physical address. */ + ret = umc_v8_10_swizzle_mode_na_to_pa(adev, channel_index, + na_err_addr, &retired_page_addr); + if (ret) { + dev_err(adev->dev, "Failed to map pa from umc na.\n"); + break; + } + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", + retired_page_addr); + amdgpu_umc_fill_error_record(err_data, na_err_addr, + retired_page_addr, channel_index, umc_inst); + } + } + } + + /* clear umc status */ + WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); +} + +static void umc_v8_10_query_ras_error_address(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + uint32_t node_inst = 0; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { + umc_reg_offset = get_umc_v8_10_reg_offset(adev, + node_inst, + umc_inst, + ch_inst); + + umc_v8_10_query_error_address(adev, + err_data, + umc_reg_offset, + node_inst, + ch_inst, + umc_inst); + } +} + +static void umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev, + uint32_t umc_reg_offset) +{ + uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; + uint32_t ecc_err_cnt_addr; + + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCntSel); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); + + ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4); + + /* set ce error interrupt type to APIC based interrupt */ + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_GeccErrCntSel, + GeccErrInt, 0x1); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); + /* set error count to initial value */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_10_CE_CNT_INIT); +} + +static void umc_v8_10_err_cnt_init(struct amdgpu_device *adev) +{ + uint32_t node_inst = 0; + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { + umc_reg_offset = get_umc_v8_10_reg_offset(adev, + node_inst, + umc_inst, + ch_inst); + + umc_v8_10_err_cnt_init_per_channel(adev, umc_reg_offset); + } +} + +const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = { + .query_ras_error_count = umc_v8_10_query_ras_error_count, + .query_ras_error_address = umc_v8_10_query_ras_error_address, +}; + +struct amdgpu_umc_ras umc_v8_10_ras = { + .ras_block = { + .hw_ops = &umc_v8_10_ras_hw_ops, + }, + .err_cnt_init = umc_v8_10_err_cnt_init, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h new file mode 100644 index 0000000000000..849ede88e111b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h @@ -0,0 +1,70 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __UMC_V8_10_H__ +#define __UMC_V8_10_H__ + +#include "soc15_common.h" +#include "amdgpu.h" + +/* number of umc channel instance with memory map register access */ +#define UMC_V8_10_CHANNEL_INSTANCE_NUM 2 +/* number of umc instance with memory map register access */ +#define UMC_V8_10_UMC_INSTANCE_NUM 2 + +/* Total channel instances for all umc nodes */ +#define UMC_V8_10_TOTAL_CHANNEL_NUM(adev) \ + (UMC_V8_10_CHANNEL_INSTANCE_NUM * UMC_V8_10_UMC_INSTANCE_NUM * (adev)->umc.node_inst_num) + +/* UMC regiser per channel offset */ +#define UMC_V8_10_PER_CHANNEL_OFFSET 0x400 + +/* EccErrCnt max value */ +#define UMC_V8_10_CE_CNT_MAX 0xffff +/* umc ce interrupt threshold */ +#define UUMC_V8_10_CE_INT_THRESHOLD 0xffff +/* umc ce count initial value */ +#define UMC_V8_10_CE_CNT_INIT (UMC_V8_10_CE_CNT_MAX - UUMC_V8_10_CE_INT_THRESHOLD) + +#define UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM 4 + +/* The C5 bit in NA address */ +#define UMC_V8_10_NA_C5_BIT 14 + +/* Map to swizzle mode address */ +#define SWIZZLE_MODE_TMP_ADDR(na, ch_num, ch_idx) \ + ((((na) >> 10) * (ch_num) + (ch_idx)) << 10) +#define SWIZZLE_MODE_ADDR_HI(addr, col_bit) \ + (((addr) >> ((col_bit) + 2)) << ((col_bit) + 2)) +#define SWIZZLE_MODE_ADDR_MID(na, col_bit) ((((na) >> 8) & 0x3) << (col_bit)) +#define SWIZZLE_MODE_ADDR_LOW(addr, col_bit) \ + ((((addr) >> 10) & ((0x1ULL << (col_bit - 8)) - 1)) << 8) +#define SWIZZLE_MODE_ADDR_LSB(na) ((na) & 0xFF) + +extern struct amdgpu_umc_ras umc_v8_10_ras; +extern const uint32_t + umc_v8_10_channel_idx_tbl[] + [UMC_V8_10_UMC_INSTANCE_NUM] + [UMC_V8_10_CHANNEL_INSTANCE_NUM]; + +#endif + From 604d3a3f0dca0ef0ab0322f5e7af8c70c5ddb6d4 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Wed, 6 Jul 2022 15:13:55 +0530 Subject: [PATCH 005/134] drm/amdgpu: fix for coding style issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixed below checkpatch warnings and errors drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c:131: CHECK: Comparison to NULL could be written "apd" drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c:150: CHECK: Comparison to NULL could be written "apd" drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c:196: CHECK: Prefer kernel type 'u64' over 'uint64_t' drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c:224: CHECK: Please don't use multiple blank lines drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c:226: CHECK: Comparison to NULL could be written "!adev->acp.acp_genpd" drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c:233: CHECK: Please don't use multiple blank lines drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c:239: CHECK: Alignment should match open parenthesis drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c:241: CHECK: Comparison to NULL could be written "!adev->acp.acp_cell" drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c:247: CHECK: Comparison to NULL could be written "!adev->acp.acp_res" drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c:253: CHECK: Comparison to NULL could be written "!i2s_pdata" drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c:350: CHECK: Alignment should match open parenthesis drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c:550: ERROR: that open brace { should be on the previous line Signed-off-by: Vijendar Mukunda Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c | 35 +++++++++---------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index cc9c9f8b23b2c..ceda19152c77e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -128,16 +128,14 @@ static int acp_poweroff(struct generic_pm_domain *genpd) struct amdgpu_device *adev; apd = container_of(genpd, struct acp_pm_domain, gpd); - if (apd != NULL) { - adev = apd->adev; + adev = apd->adev; /* call smu to POWER GATE ACP block * smu will * 1. turn off the acp clock * 2. power off the acp tiles * 3. check and enter ulv state */ - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true); - } + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true); return 0; } @@ -147,16 +145,14 @@ static int acp_poweron(struct generic_pm_domain *genpd) struct amdgpu_device *adev; apd = container_of(genpd, struct acp_pm_domain, gpd); - if (apd != NULL) { - adev = apd->adev; + adev = apd->adev; /* call smu to UNGATE ACP block * smu will * 1. exit ulv * 2. turn on acp clock * 3. power on acp tiles */ - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false); - } + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false); return 0; } @@ -193,7 +189,7 @@ static int acp_genpd_remove_device(struct device *dev, void *data) static int acp_hw_init(void *handle) { int r; - uint64_t acp_base; + u64 acp_base; u32 val = 0; u32 count = 0; struct i2s_platform_data *i2s_pdata = NULL; @@ -220,37 +216,32 @@ static int acp_hw_init(void *handle) return -EINVAL; acp_base = adev->rmmio_base; - - adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL); - if (adev->acp.acp_genpd == NULL) + if (!adev->acp.acp_genpd) return -ENOMEM; adev->acp.acp_genpd->gpd.name = "ACP_AUDIO"; adev->acp.acp_genpd->gpd.power_off = acp_poweroff; adev->acp.acp_genpd->gpd.power_on = acp_poweron; - - adev->acp.acp_genpd->adev = adev; pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false); - adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell), - GFP_KERNEL); + adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell), GFP_KERNEL); - if (adev->acp.acp_cell == NULL) { + if (!adev->acp.acp_cell) { r = -ENOMEM; goto failure; } adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL); - if (adev->acp.acp_res == NULL) { + if (!adev->acp.acp_res) { r = -ENOMEM; goto failure; } i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL); - if (i2s_pdata == NULL) { + if (!i2s_pdata) { r = -ENOMEM; goto failure; } @@ -346,8 +337,7 @@ static int acp_hw_init(void *handle) adev->acp.acp_cell[3].platform_data = &i2s_pdata[2]; adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data); - r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, - ACP_DEVS); + r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, ACP_DEVS); if (r) goto failure; @@ -546,8 +536,7 @@ static const struct amd_ip_funcs acp_ip_funcs = { .set_powergating_state = acp_set_powergating_state, }; -const struct amdgpu_ip_block_version acp_ip_block = -{ +const struct amdgpu_ip_block_version acp_ip_block = { .type = AMD_IP_BLOCK_TYPE_ACP, .major = 2, .minor = 2, From 748262eb400e809aa13e3485f4983c3db3d0ebb3 Mon Sep 17 00:00:00 2001 From: lin cao Date: Wed, 13 Jul 2022 18:20:58 +0800 Subject: [PATCH 006/134] drm/amdgpu: Call trace info was found in dmesg when loading amdgpu In the case of SRIOV, the register smnMp1_PMI_3_FIFO will get an invalid value which will cause the "shift out of bound". In Ubuntu22.04, this issue will be checked an related call trace will be reported in dmesg. Signed-off-by: lin cao Reviewed-by: Jingwen Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index b71860e5324af..fa520d79ef67f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -886,6 +886,7 @@ static void sienna_cichlid_stb_init(struct smu_context *smu); static int sienna_cichlid_init_smc_tables(struct smu_context *smu) { + struct amdgpu_device *adev = smu->adev; int ret = 0; ret = sienna_cichlid_tables_init(smu); @@ -896,7 +897,8 @@ static int sienna_cichlid_init_smc_tables(struct smu_context *smu) if (ret) return ret; - sienna_cichlid_stb_init(smu); + if (!amdgpu_sriov_vf(adev)) + sienna_cichlid_stb_init(smu); return smu_v11_0_init_smc_tables(smu); } From 49062ee374f7a47443dac7e255bd419861de5b43 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 30 Jun 2022 07:56:50 +0530 Subject: [PATCH 007/134] drm/amdgpu: add dmi check for jadeite platform DMI check is required to distinguish Jadeite platform from Stoney base variant. Add DMI check logic for Jadeite platform. Signed-off-by: Vijendar Mukunda Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c | 36 +++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index ceda19152c77e..4c265ad198b8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -29,6 +29,8 @@ #include #include #include +#include +#include #include "amdgpu.h" #include "atom.h" @@ -36,6 +38,7 @@ #include "acp_gfx_if.h" +#define ST_JADEITE 1 #define ACP_TILE_ON_MASK 0x03 #define ACP_TILE_OFF_MASK 0x02 #define ACP_TILE_ON_RETAIN_REG_MASK 0x1f @@ -85,6 +88,8 @@ #define ACP_DEVS 4 #define ACP_SRC_ID 162 +static unsigned long acp_machine_id; + enum { ACP_TILE_P1 = 0, ACP_TILE_P2, @@ -180,6 +185,37 @@ static int acp_genpd_remove_device(struct device *dev, void *data) return 0; } +static int acp_quirk_cb(const struct dmi_system_id *id) +{ + acp_machine_id = ST_JADEITE; + return 1; +} + +static const struct dmi_system_id acp_quirk_table[] = { + { + .callback = acp_quirk_cb, + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMD"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Jadeite"), + } + }, + { + .callback = acp_quirk_cb, + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "IP3 Technology CO.,Ltd."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ASN1D"), + }, + }, + { + .callback = acp_quirk_cb, + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Standard"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ASN10"), + }, + }, + {} +}; + /** * acp_hw_init - start and test ACP block * From 4c33e5179ff1a09cdfba38fa4fd260ae6735d3bb Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 30 Jun 2022 07:57:14 +0530 Subject: [PATCH 008/134] drm/amdgpu: create I2S platform devices for Jadeite platform Jadeite platform uses I2S MICSP instance. Create platform devices for DMA controller and I2S controller for Jadeite platform. Signed-off-by: Vijendar Mukunda Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c | 293 +++++++++++++++--------- 1 file changed, 184 insertions(+), 109 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index 4c265ad198b8d..bcc7ee02e0fcb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -262,125 +262,200 @@ static int acp_hw_init(void *handle) adev->acp.acp_genpd->adev = adev; pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false); + dmi_check_system(acp_quirk_table); + switch (acp_machine_id) { + case ST_JADEITE: + { + adev->acp.acp_cell = kcalloc(2, sizeof(struct mfd_cell), + GFP_KERNEL); + if (!adev->acp.acp_cell) { + r = -ENOMEM; + goto failure; + } - adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell), GFP_KERNEL); - - if (!adev->acp.acp_cell) { - r = -ENOMEM; - goto failure; - } - - adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL); - if (!adev->acp.acp_res) { - r = -ENOMEM; - goto failure; - } + adev->acp.acp_res = kcalloc(3, sizeof(struct resource), GFP_KERNEL); + if (!adev->acp.acp_res) { + r = -ENOMEM; + goto failure; + } - i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL); - if (!i2s_pdata) { - r = -ENOMEM; - goto failure; - } + i2s_pdata = kcalloc(1, sizeof(struct i2s_platform_data), GFP_KERNEL); + if (!i2s_pdata) { + r = -ENOMEM; + goto failure; + } - switch (adev->asic_type) { - case CHIP_STONEY: i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | - DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + i2s_pdata[0].cap = DWC_I2S_PLAY | DWC_I2S_RECORD; + i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000; + i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET; + i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET; + + adev->acp.acp_res[0].name = "acp2x_dma"; + adev->acp.acp_res[0].flags = IORESOURCE_MEM; + adev->acp.acp_res[0].start = acp_base; + adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END; + + adev->acp.acp_res[1].name = "acp2x_dw_i2s_play_cap"; + adev->acp.acp_res[1].flags = IORESOURCE_MEM; + adev->acp.acp_res[1].start = acp_base + ACP_I2S_CAP_REGS_START; + adev->acp.acp_res[1].end = acp_base + ACP_I2S_CAP_REGS_END; + + adev->acp.acp_res[2].name = "acp2x_dma_irq"; + adev->acp.acp_res[2].flags = IORESOURCE_IRQ; + adev->acp.acp_res[2].start = amdgpu_irq_create_mapping(adev, 162); + adev->acp.acp_res[2].end = adev->acp.acp_res[2].start; + + adev->acp.acp_cell[0].name = "acp_audio_dma"; + adev->acp.acp_cell[0].num_resources = 3; + adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0]; + adev->acp.acp_cell[0].platform_data = &adev->asic_type; + adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type); + + adev->acp.acp_cell[1].name = "designware-i2s"; + adev->acp.acp_cell[1].num_resources = 1; + adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1]; + adev->acp.acp_cell[1].platform_data = &i2s_pdata[0]; + adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data); + r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, 2); + if (r) + goto failure; + r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd, + acp_genpd_add_device); + if (r) + goto failure; break; - default: - i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; } - i2s_pdata[0].cap = DWC_I2S_PLAY; - i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000; - i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET; - i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET; - switch (adev->asic_type) { - case CHIP_STONEY: - i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | - DW_I2S_QUIRK_COMP_PARAM1 | - DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; - break; default: - i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | - DW_I2S_QUIRK_COMP_PARAM1; - } + adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell), + GFP_KERNEL); - i2s_pdata[1].cap = DWC_I2S_RECORD; - i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000; - i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET; - i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET; + if (!adev->acp.acp_cell) { + r = -ENOMEM; + goto failure; + } - i2s_pdata[2].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; - switch (adev->asic_type) { - case CHIP_STONEY: - i2s_pdata[2].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; - break; - default: - break; - } + adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL); + if (!adev->acp.acp_res) { + r = -ENOMEM; + goto failure; + } + + i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL); + if (!i2s_pdata) { + r = -ENOMEM; + goto failure; + } + + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | + DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; + } + i2s_pdata[0].cap = DWC_I2S_PLAY; + i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000; + i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET; + i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET; + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | + DW_I2S_QUIRK_COMP_PARAM1 | + DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | + DW_I2S_QUIRK_COMP_PARAM1; + } + + i2s_pdata[1].cap = DWC_I2S_RECORD; + i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000; + i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET; + i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET; - i2s_pdata[2].cap = DWC_I2S_PLAY | DWC_I2S_RECORD; - i2s_pdata[2].snd_rates = SNDRV_PCM_RATE_8000_96000; - i2s_pdata[2].i2s_reg_comp1 = ACP_BT_COMP1_REG_OFFSET; - i2s_pdata[2].i2s_reg_comp2 = ACP_BT_COMP2_REG_OFFSET; - - adev->acp.acp_res[0].name = "acp2x_dma"; - adev->acp.acp_res[0].flags = IORESOURCE_MEM; - adev->acp.acp_res[0].start = acp_base; - adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END; - - adev->acp.acp_res[1].name = "acp2x_dw_i2s_play"; - adev->acp.acp_res[1].flags = IORESOURCE_MEM; - adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START; - adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END; - - adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap"; - adev->acp.acp_res[2].flags = IORESOURCE_MEM; - adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START; - adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END; - - adev->acp.acp_res[3].name = "acp2x_dw_bt_i2s_play_cap"; - adev->acp.acp_res[3].flags = IORESOURCE_MEM; - adev->acp.acp_res[3].start = acp_base + ACP_BT_PLAY_REGS_START; - adev->acp.acp_res[3].end = acp_base + ACP_BT_PLAY_REGS_END; - - adev->acp.acp_res[4].name = "acp2x_dma_irq"; - adev->acp.acp_res[4].flags = IORESOURCE_IRQ; - adev->acp.acp_res[4].start = amdgpu_irq_create_mapping(adev, 162); - adev->acp.acp_res[4].end = adev->acp.acp_res[4].start; - - adev->acp.acp_cell[0].name = "acp_audio_dma"; - adev->acp.acp_cell[0].num_resources = 5; - adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0]; - adev->acp.acp_cell[0].platform_data = &adev->asic_type; - adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type); - - adev->acp.acp_cell[1].name = "designware-i2s"; - adev->acp.acp_cell[1].num_resources = 1; - adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1]; - adev->acp.acp_cell[1].platform_data = &i2s_pdata[0]; - adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data); - - adev->acp.acp_cell[2].name = "designware-i2s"; - adev->acp.acp_cell[2].num_resources = 1; - adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2]; - adev->acp.acp_cell[2].platform_data = &i2s_pdata[1]; - adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data); - - adev->acp.acp_cell[3].name = "designware-i2s"; - adev->acp.acp_cell[3].num_resources = 1; - adev->acp.acp_cell[3].resources = &adev->acp.acp_res[3]; - adev->acp.acp_cell[3].platform_data = &i2s_pdata[2]; - adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data); - - r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, ACP_DEVS); - if (r) - goto failure; - - r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd, - acp_genpd_add_device); - if (r) - goto failure; + i2s_pdata[2].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[2].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + break; + } + + i2s_pdata[2].cap = DWC_I2S_PLAY | DWC_I2S_RECORD; + i2s_pdata[2].snd_rates = SNDRV_PCM_RATE_8000_96000; + i2s_pdata[2].i2s_reg_comp1 = ACP_BT_COMP1_REG_OFFSET; + i2s_pdata[2].i2s_reg_comp2 = ACP_BT_COMP2_REG_OFFSET; + + i2s_pdata[3].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; + switch (adev->asic_type) { + case CHIP_STONEY: + i2s_pdata[3].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE; + break; + default: + break; + } + adev->acp.acp_res[0].name = "acp2x_dma"; + adev->acp.acp_res[0].flags = IORESOURCE_MEM; + adev->acp.acp_res[0].start = acp_base; + adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END; + + adev->acp.acp_res[1].name = "acp2x_dw_i2s_play"; + adev->acp.acp_res[1].flags = IORESOURCE_MEM; + adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START; + adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END; + + adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap"; + adev->acp.acp_res[2].flags = IORESOURCE_MEM; + adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START; + adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END; + + adev->acp.acp_res[3].name = "acp2x_dw_bt_i2s_play_cap"; + adev->acp.acp_res[3].flags = IORESOURCE_MEM; + adev->acp.acp_res[3].start = acp_base + ACP_BT_PLAY_REGS_START; + adev->acp.acp_res[3].end = acp_base + ACP_BT_PLAY_REGS_END; + + adev->acp.acp_res[4].name = "acp2x_dma_irq"; + adev->acp.acp_res[4].flags = IORESOURCE_IRQ; + adev->acp.acp_res[4].start = amdgpu_irq_create_mapping(adev, 162); + adev->acp.acp_res[4].end = adev->acp.acp_res[4].start; + + adev->acp.acp_cell[0].name = "acp_audio_dma"; + adev->acp.acp_cell[0].num_resources = 5; + adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0]; + adev->acp.acp_cell[0].platform_data = &adev->asic_type; + adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type); + + adev->acp.acp_cell[1].name = "designware-i2s"; + adev->acp.acp_cell[1].num_resources = 1; + adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1]; + adev->acp.acp_cell[1].platform_data = &i2s_pdata[0]; + adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data); + + adev->acp.acp_cell[2].name = "designware-i2s"; + adev->acp.acp_cell[2].num_resources = 1; + adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2]; + adev->acp.acp_cell[2].platform_data = &i2s_pdata[1]; + adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data); + + adev->acp.acp_cell[3].name = "designware-i2s"; + adev->acp.acp_cell[3].num_resources = 1; + adev->acp.acp_cell[3].resources = &adev->acp.acp_res[3]; + adev->acp.acp_cell[3].platform_data = &i2s_pdata[2]; + adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data); + + r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, ACP_DEVS); + if (r) + goto failure; + + r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd, + acp_genpd_add_device); + if (r) + goto failure; + } /* Assert Soft reset of ACP */ val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET); From 912db6a58738e8be502838eb6a88f207ba356cd7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 11 Jul 2022 21:59:06 -0400 Subject: [PATCH 009/134] drm/amdgpu: use the same HDP flush registers for all nbio 7.4.x Align aldebaran with all other asics. One HDP bit per SDMA instance, aligned with firmware. This is effectively a revert of commit a0f9f8546668 ("drm/amdgpu/nbio7.4: don't use GPU_HDP_FLUSH bit 12"). On further discussions with the relevant hardware teams, re-align the bits for SDMA. Fixes: a0f9f8546668 ("drm/amdgpu/nbio7.4: don't use GPU_HDP_FLUSH bit 12") Reviewed-by: Kent Russell Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 5 +---- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 21 ------------------- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h | 1 - 3 files changed, 1 insertion(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 37234c2998d73..8fb7cf52c56df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -2206,12 +2206,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(7, 4, 0): case IP_VERSION(7, 4, 1): - adev->nbio.funcs = &nbio_v7_4_funcs; - adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg; - break; case IP_VERSION(7, 4, 4): adev->nbio.funcs = &nbio_v7_4_funcs; - adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg_ald; + adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg; break; case IP_VERSION(7, 2, 0): case IP_VERSION(7, 2, 1): diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 4531761dcf77f..11848d1e238b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -339,27 +339,6 @@ const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = { .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK, }; -const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg_ald = { - .ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK, - .ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK, - .ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK, - .ref_and_mask_cp3 = GPU_HDP_FLUSH_DONE__CP3_MASK, - .ref_and_mask_cp4 = GPU_HDP_FLUSH_DONE__CP4_MASK, - .ref_and_mask_cp5 = GPU_HDP_FLUSH_DONE__CP5_MASK, - .ref_and_mask_cp6 = GPU_HDP_FLUSH_DONE__CP6_MASK, - .ref_and_mask_cp7 = GPU_HDP_FLUSH_DONE__CP7_MASK, - .ref_and_mask_cp8 = GPU_HDP_FLUSH_DONE__CP8_MASK, - .ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK, - .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK, - .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK, - .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK, - .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK, - .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK, - .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK, - .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK, - .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK, -}; - static void nbio_v7_4_init_registers(struct amdgpu_device *adev) { uint32_t baco_cntl; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h index 7490022d79d4f..f27c417288224 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h @@ -27,7 +27,6 @@ #include "soc15_common.h" extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg; -extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg_ald; extern const struct amdgpu_nbio_funcs nbio_v7_4_funcs; extern struct amdgpu_nbio_ras nbio_v7_4_ras; From 98a90f1f0fdd112b85b16ef6ceee69f319ab9311 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 11 Jul 2022 22:04:56 -0400 Subject: [PATCH 010/134] drm/amdgpu: use the same HDP flush registers for all nbio 2.3.x Align RDNA2.x with other asics. One HDP bit per SDMA instance, aligned with firmware. This is effectively a revert of commit 369b7d04baf3 ("drm/amdgpu/nbio2.3: don't use GPU_HDP_FLUSH bit 12"). On further discussions with the relevant hardware teams, re-align the bits for SDMA. Fixes: 369b7d04baf3 ("drm/amdgpu/nbio2.3: don't use GPU_HDP_FLUSH bit 12") Reviewed-by: Kent Russell Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 5 +---- drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 21 ------------------- drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h | 1 - 3 files changed, 1 insertion(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 8fb7cf52c56df..0ba56e2ebf09b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -2222,15 +2222,12 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(2, 3, 0): case IP_VERSION(2, 3, 1): case IP_VERSION(2, 3, 2): - adev->nbio.funcs = &nbio_v2_3_funcs; - adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; - break; case IP_VERSION(3, 3, 0): case IP_VERSION(3, 3, 1): case IP_VERSION(3, 3, 2): case IP_VERSION(3, 3, 3): adev->nbio.funcs = &nbio_v2_3_funcs; - adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg_sc; + adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; break; case IP_VERSION(4, 3, 0): case IP_VERSION(4, 3, 1): diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index 34c610b9157d0..b465baa267628 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -328,27 +328,6 @@ const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg = { .ref_and_mask_sdma1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA1_MASK, }; -const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg_sc = { - .ref_and_mask_cp0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP0_MASK, - .ref_and_mask_cp1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP1_MASK, - .ref_and_mask_cp2 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP2_MASK, - .ref_and_mask_cp3 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP3_MASK, - .ref_and_mask_cp4 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP4_MASK, - .ref_and_mask_cp5 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP5_MASK, - .ref_and_mask_cp6 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP6_MASK, - .ref_and_mask_cp7 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP7_MASK, - .ref_and_mask_cp8 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP8_MASK, - .ref_and_mask_cp9 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP9_MASK, - .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK, - .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK, - .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK, - .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK, - .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK, - .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK, - .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK, - .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK, -}; - static void nbio_v2_3_init_registers(struct amdgpu_device *adev) { uint32_t def, data; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h index 6074dd3a1ed8f..a43b60acf7f63 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h @@ -27,7 +27,6 @@ #include "soc15_common.h" extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg; -extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg_sc; extern const struct amdgpu_nbio_funcs nbio_v2_3_funcs; #endif From 958afce98c2c86732483458c03540d3c6ef45254 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 6 Jul 2022 11:10:15 -0400 Subject: [PATCH 011/134] drm/amdgpu: restore original stable pstate on ctx fini Save the original stable pstate on ctx init and restore it on ctx fini so that we restore a manually selected stable pstate on ctx exit. v2: fix init order (Alex) v3: don't add new variable to ctx struct (Evan) Fixes: c65b364c52ba ("drm/amdgpu/ctx: only reset stable pstate if the user changed it (v2)") Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 60 ++++++++++++++----------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 2ef5296216d64..8ee4e8491f391 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -272,32 +272,6 @@ static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) return res; } -static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, - struct drm_file *filp, struct amdgpu_ctx *ctx) -{ - int r; - - r = amdgpu_ctx_priority_permit(filp, priority); - if (r) - return r; - - memset(ctx, 0, sizeof(*ctx)); - - kref_init(&ctx->refcount); - ctx->mgr = mgr; - spin_lock_init(&ctx->ring_lock); - mutex_init(&ctx->lock); - - ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter); - ctx->reset_counter_query = ctx->reset_counter; - ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter); - ctx->init_priority = priority; - ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; - ctx->stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE; - - return 0; -} - static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, u32 *stable_pstate) { @@ -326,6 +300,38 @@ static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, return 0; } +static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, + struct drm_file *filp, struct amdgpu_ctx *ctx) +{ + u32 current_stable_pstate; + int r; + + r = amdgpu_ctx_priority_permit(filp, priority); + if (r) + return r; + + memset(ctx, 0, sizeof(*ctx)); + + kref_init(&ctx->refcount); + ctx->mgr = mgr; + spin_lock_init(&ctx->ring_lock); + mutex_init(&ctx->lock); + + ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter); + ctx->reset_counter_query = ctx->reset_counter; + ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter); + ctx->init_priority = priority; + ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; + + r = amdgpu_ctx_get_stable_pstate(ctx, ¤t_stable_pstate); + if (r) + return r; + + ctx->stable_pstate = current_stable_pstate; + + return 0; +} + static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, u32 stable_pstate) { @@ -397,7 +403,7 @@ static void amdgpu_ctx_fini(struct kref *ref) } if (drm_dev_enter(&adev->ddev, &idx)) { - amdgpu_ctx_set_stable_pstate(ctx, AMDGPU_CTX_STABLE_PSTATE_NONE); + amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate); drm_dev_exit(idx); } From 3876a8b5e241081b2a519f848a65c00d8e6cd124 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 12 Jul 2022 15:42:47 -0700 Subject: [PATCH 012/134] drm/amd/display: Enable building new display engine with KCOV enabled The new display engine uses floating point math, which is not supported by KCOV. Commit 9d1d02ff3678 ("drm/amd/display: Don't build DCN1 when kcov is enabled") tried to work around the problem by disabling CONFIG_DRM_AMD_DC_DCN if KCOV_INSTRUMENT_ALL and KCOV_ENABLE_COMPARISONS are enabled. The result is that KCOV can not be enabled on systems which require this display engine. A much simpler and less invasive solution is to disable KCOV selectively when compiling the display enagine while keeping it enabled for the rest of the kernel. Fixes: 9d1d02ff3678 ("drm/amd/display: Don't build DCN1 when kcov is enabled") Cc: Arnd Bergmann Cc: Leo Li Reviewed-by: Harry Wentland Signed-off-by: Guenter Roeck Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/Kconfig | 2 +- drivers/gpu/drm/amd/display/dc/Makefile | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index b4029c0d5d8c5..96cbc87f7b6b8 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -6,7 +6,7 @@ config DRM_AMD_DC bool "AMD DC - Enable new display engine" default y select SND_HDA_COMPONENT if SND_HDA_CORE - select DRM_AMD_DC_DCN if (X86 || PPC64) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) + select DRM_AMD_DC_DCN if (X86 || PPC64) help Choose this option if you want to use the new display engine support for AMDGPU. This adds required support for Vega and diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile index 273f8f2c8e020..b9effadfc4bb7 100644 --- a/drivers/gpu/drm/amd/display/dc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/Makefile @@ -25,6 +25,9 @@ DC_LIBS = basics bios dml clk_mgr dce gpio irq link virtual ifdef CONFIG_DRM_AMD_DC_DCN + +KCOV_INSTRUMENT := n + DC_LIBS += dcn20 DC_LIBS += dsc DC_LIBS += dcn10 From 606ee059f85a9aabb0242cbc72852572a3c40432 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 14 Jul 2022 11:34:27 +0100 Subject: [PATCH 013/134] drm/amd/display: Fix spelling mistake "supporing" -> "supporting" There is a spelling mistake in a dml_print message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c index 6101c962ab0a2..fc4d7474c111e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c @@ -2994,7 +2994,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->ImmediateFlipSupportedForPipe[k] == false) { #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: Pipe %0d not supporing iflip\n", __func__, k); + dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k); #endif v->ImmediateFlipSupported = false; } From 133dc89c640f965c2736e70d8dec0699d763850d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Thu, 14 Jul 2022 10:49:50 -0300 Subject: [PATCH 014/134] drm/amdgpu: Clarify asics naming in Kconfig options MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clarify which architecture those asics acronyms refers to. Signed-off-by: André Almeida Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Kconfig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 74a8105fd2c03..7777d55275de8 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -4,7 +4,7 @@ config DRM_AMDGPU_SI depends on DRM_AMDGPU help Choose this option if you want to enable experimental support - for SI asics. + for SI (Southern Islands) asics. SI is already supported in radeon. Experimental support for SI in amdgpu will be disabled by default and is still provided by @@ -16,7 +16,8 @@ config DRM_AMDGPU_CIK bool "Enable amdgpu support for CIK parts" depends on DRM_AMDGPU help - Choose this option if you want to enable support for CIK asics. + Choose this option if you want to enable support for CIK (Sea + Islands) asics. CIK is already supported in radeon. Support for CIK in amdgpu will be disabled by default and is still provided by radeon. From 2d04559e0660cf503d4e46cfbd7421d0b6156aa1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Jul 2022 14:33:37 +0300 Subject: [PATCH 015/134] drm/amd/display: Remove unnecessary NULL check in commit_planes_for_stream() Smatch complains that: drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc.c:3369 commit_planes_for_stream() warn: variable dereferenced before check 'stream' (see line 3114) The 'stream' pointer cannot be NULL and the check can be removed. Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 51c9563ad1371..e42f44fc1c08d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3478,7 +3478,7 @@ static void commit_planes_for_stream(struct dc *dc, top_pipe_to_program->stream_res.tg, CRTC_STATE_VACTIVE); - if (stream && should_use_dmub_lock(stream->link)) { + if (should_use_dmub_lock(stream->link)) { union dmub_hw_lock_flags hw_locks = { 0 }; struct dmub_hw_lock_inst_flags inst_flags = { 0 }; From 50fe04d46aab77cf8fc3d48fbc38d872d181a849 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Thu, 14 Jul 2022 15:32:51 +0800 Subject: [PATCH 016/134] drm/amdgpu: introduce runtime pm mode It can benefit code consistency in future. Suggested-by: Lijo Lazar Signed-off-by: Guchun Chen Reviewed-by: Lijo Lazar Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 6 +++++- drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 9 +++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 6de63ea6687eb..fbbcdbdbe65a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -161,10 +161,12 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) if (amdgpu_device_supports_px(dev) && (amdgpu_runtime_pm != 0)) { /* enable runpm by default for atpx */ adev->runpm = true; + adev->pm.rpm_mode = AMDGPU_RUNPM_PX; dev_info(adev->dev, "Using ATPX for runtime pm\n"); } else if (amdgpu_device_supports_boco(dev) && (amdgpu_runtime_pm != 0)) { /* enable runpm by default for boco */ adev->runpm = true; + adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO; dev_info(adev->dev, "Using BOCO for runtime pm\n"); } else if (amdgpu_device_supports_baco(dev) && (amdgpu_runtime_pm != 0)) { @@ -188,8 +190,10 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) amdgpu_runtime_pm_quirk(adev); - if (adev->runpm) + if (adev->runpm) { + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; dev_info(adev->dev, "Using BACO for runtime pm\n"); + } } /* Call ACPI methods: require modeset init diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index 524fb09437e57..65624d091ed2d 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -45,6 +45,13 @@ enum amdgpu_int_thermal_type { THERMAL_TYPE_KV, }; +enum amdgpu_runpm_mode { + AMDGPU_RUNPM_NONE, + AMDGPU_RUNPM_PX, + AMDGPU_RUNPM_BOCO, + AMDGPU_RUNPM_BACO, +}; + struct amdgpu_ps { u32 caps; /* vbios flags */ u32 class; /* vbios flags */ @@ -355,6 +362,8 @@ struct amdgpu_pm { struct amdgpu_ctx *stable_pstate_ctx; struct config_table_setting config_table; + /* runtime mode */ + enum amdgpu_runpm_mode rpm_mode; }; int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors sensor, From f746556aa9724d740c44861db0ac005ae50e1332 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Thu, 14 Jul 2022 15:55:37 +0800 Subject: [PATCH 017/134] drm/amdgpu: skip SMU FW reloading in runpm BACO case SMU is always alive, so it's fine to skip SMU FW reloading when runpm resumed from BACO, this can avoid some race issues when resuming SMU. Suggested-by: Evan Quan Signed-off-by: Guchun Chen Reviewed-by: Lijo Lazar Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index e9411c28d88ba..6540582ecbf82 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2348,6 +2348,13 @@ static int psp_load_smu_fw(struct psp_context *psp) &adev->firmware.ucode[AMDGPU_UCODE_ID_SMC]; struct amdgpu_ras *ras = psp->ras_context.ras; + /* + * Skip SMU FW reloading in case of using BACO for runpm only, + * as SMU is always alive. + */ + if (adev->in_runpm && (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO)) + return 0; + if (!ucode->fw || amdgpu_sriov_vf(psp->adev)) return 0; From 75a9ad8c1b7960794c16182fd90852918ecdc0a6 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Thu, 14 Jul 2022 16:01:03 +0800 Subject: [PATCH 018/134] drm/amdgpu: drop runtime pm disablement quirk on several sienna cichlid cards This quirk is not needed any more as it's fixed by bypassing SMU FW reloading in runtime resume. Signed-off-by: Guchun Chen Reviewed-by: Lijo Lazar Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index fbbcdbdbe65a7..a74fa195b3790 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -43,17 +43,6 @@ #include "amdgpu_display.h" #include "amdgpu_ras.h" -static void amdgpu_runtime_pm_quirk(struct amdgpu_device *adev) -{ - /* - * Add below quirk on several sienna_cichlid cards to disable - * runtime pm to fix EMI failures. - */ - if (((adev->pdev->device == 0x73A1) && (adev->pdev->revision == 0x00)) || - ((adev->pdev->device == 0x73BF) && (adev->pdev->revision == 0xCF))) - adev->runpm = false; -} - void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev) { struct amdgpu_gpu_instance *gpu_instance; @@ -188,8 +177,6 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) break; } - amdgpu_runtime_pm_quirk(adev); - if (adev->runpm) { adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; dev_info(adev->dev, "Using BACO for runtime pm\n"); From 9c913f38032e873758d1b13fce720a0c2cbbfcb7 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Thu, 14 Jul 2022 16:37:29 +0800 Subject: [PATCH 019/134] drm/amdgpu: drop runpm from amdgpu_device structure It's redundant, as now switching to rpm_mode to indicate runtime power management mode. Suggested-by: Lijo Lazar Signed-off-by: Guchun Chen Reviewed-by: Lijo Lazar Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 +++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 23 ++++++++++------------- 3 files changed, 15 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2871a3e3801f8..3aa8ae1bc35af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1011,7 +1011,6 @@ struct amdgpu_device { uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS]; /* enable runtime pm on the device */ - bool runpm; bool in_runpm; bool has_pr3; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index e3d1397081606..4c66aff11a400 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2121,7 +2121,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, if (ret) DRM_ERROR("Creating debugfs files failed (%d).\n", ret); - if (adev->runpm) { + if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) { /* only need to skip on ATPX */ if (amdgpu_device_supports_px(ddev)) dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_NO_DIRECT_COMPLETE); @@ -2178,7 +2178,7 @@ amdgpu_pci_remove(struct pci_dev *pdev) drm_dev_unplug(dev); - if (adev->runpm) { + if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) { pm_runtime_get_sync(dev->dev); pm_runtime_forbid(dev->dev); } @@ -2461,7 +2461,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) struct amdgpu_device *adev = drm_to_adev(drm_dev); int ret, i; - if (!adev->runpm) { + if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) { pm_runtime_forbid(dev); return -EBUSY; } @@ -2530,7 +2530,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) struct amdgpu_device *adev = drm_to_adev(drm_dev); int ret; - if (!adev->runpm) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) return -EINVAL; /* Avoids registers access if device is physically gone */ @@ -2574,7 +2574,7 @@ static int amdgpu_pmops_runtime_idle(struct device *dev) /* we don't want the main rpm_idle to call suspend - we want to autosuspend */ int ret = 1; - if (!adev->runpm) { + if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) { pm_runtime_forbid(dev); return -EBUSY; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index a74fa195b3790..b8ba59c93fc0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -147,14 +147,13 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) goto out; } + adev->pm.rpm_mode = AMDGPU_RUNPM_NONE; if (amdgpu_device_supports_px(dev) && - (amdgpu_runtime_pm != 0)) { /* enable runpm by default for atpx */ - adev->runpm = true; + (amdgpu_runtime_pm != 0)) { /* enable PX as runtime mode */ adev->pm.rpm_mode = AMDGPU_RUNPM_PX; dev_info(adev->dev, "Using ATPX for runtime pm\n"); } else if (amdgpu_device_supports_boco(dev) && - (amdgpu_runtime_pm != 0)) { /* enable runpm by default for boco */ - adev->runpm = true; + (amdgpu_runtime_pm != 0)) { /* enable boco as runtime mode */ adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO; dev_info(adev->dev, "Using BOCO for runtime pm\n"); } else if (amdgpu_device_supports_baco(dev) && @@ -162,25 +161,23 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) switch (adev->asic_type) { case CHIP_VEGA20: case CHIP_ARCTURUS: - /* enable runpm if runpm=1 */ + /* enable BACO as runpm mode if runpm=1 */ if (amdgpu_runtime_pm > 0) - adev->runpm = true; + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; break; case CHIP_VEGA10: - /* turn runpm on if noretry=0 */ + /* enable BACO as runpm mode if noretry=0 */ if (!adev->gmc.noretry) - adev->runpm = true; + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; break; default: - /* enable runpm on CI+ */ - adev->runpm = true; + /* enable BACO as runpm mode on CI+ */ + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; break; } - if (adev->runpm) { - adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) dev_info(adev->dev, "Using BACO for runtime pm\n"); - } } /* Call ACPI methods: require modeset init From 5877b7ddbc2502a7ddbc07970cf5c15972c22de4 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Tue, 31 Aug 2021 13:32:40 +0800 Subject: [PATCH 020/134] drm/amdgpu: correct the PSP_BL_CMD enum To match with the enum defined in trusted os Signed-off-by: Hawking Zhang Reviewed-by: Le Ma Reviewed-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index e431f49949319..180634616b0fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -69,8 +69,8 @@ enum psp_bootloader_cmd { PSP_BL__LOAD_SOSDRV = 0x20000, PSP_BL__LOAD_KEY_DATABASE = 0x80000, PSP_BL__LOAD_SOCDRV = 0xB0000, - PSP_BL__LOAD_INTFDRV = 0xC0000, - PSP_BL__LOAD_DBGDRV = 0xD0000, + PSP_BL__LOAD_DBGDRV = 0xC0000, + PSP_BL__LOAD_INTFDRV = 0xD0000, PSP_BL__DRAM_LONG_TRAIN = 0x100000, PSP_BL__DRAM_SHORT_TRAIN = 0x200000, PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000, From a53bc32182d7a2a934ca994239396fba21161c8e Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Sat, 16 Jul 2022 12:41:41 +0800 Subject: [PATCH 021/134] drm/amd/pm: enable mode1 reset for smu_v13_0_7 enable mode1 reset for smu_v13_0_7 since it's missing. Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc21.c | 1 + drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 765c3543ad183..00e9b7089febd 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -320,6 +320,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev) switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 0): + case IP_VERSION(13, 0, 7): return AMD_RESET_METHOD_MODE1; case IP_VERSION(13, 0, 4): return AMD_RESET_METHOD_MODE2; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 6259a85bc8185..6f05487145660 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -118,6 +118,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(DramLogSetDramSize, PPSMC_MSG_DramLogSetDramSize, 0), MSG_MAP(AllowGfxOff, PPSMC_MSG_AllowGfxOff, 0), MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff, 0), + MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset, 0), MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 0), }; From 4528c18605654bea022d294c9ba2e93600d05a94 Mon Sep 17 00:00:00 2001 From: Ruijing Dong Date: Thu, 14 Jul 2022 17:13:37 -0400 Subject: [PATCH 022/134] drm/amdgpu: add comment to HW_IP_VCN_ENC type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From VCN4, AMDGPU_HW_IP_VCN_ENC is re-used to support both encoding and decoding jobs. Link: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/245/commits Reviewed-by: Christian König Reviewed-by: Leo Liu Signed-off-by: Ruijing Dong Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 18d3246d636ef..63de71f531101 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -559,6 +559,10 @@ struct drm_amdgpu_gem_va { #define AMDGPU_HW_IP_VCE 4 #define AMDGPU_HW_IP_UVD_ENC 5 #define AMDGPU_HW_IP_VCN_DEC 6 +/* + * From VCN4, AMDGPU_HW_IP_VCN_ENC is re-used to support + * both encoding and decoding jobs. + */ #define AMDGPU_HW_IP_VCN_ENC 7 #define AMDGPU_HW_IP_VCN_JPEG 8 #define AMDGPU_HW_IP_NUM 9 From 5df79aeb6e0890d6f5fd6e12c7ad238a1617b210 Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Wed, 20 Jul 2022 15:04:18 -0400 Subject: [PATCH 023/134] drm/amdgpu: Protect the amdgpu_bo_list list with a mutex v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Protect the struct amdgpu_bo_list with a mutex. This is used during command submission in order to avoid buffer object corruption as recorded in the link below. v2 (chk): Keep the mutex looked for the whole CS to avoid using the list from multiple CS threads at the same time. Suggested-by: Christian König Cc: Alex Deucher Cc: Andrey Grodzovsky Cc: Vitaly Prosyak Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2048 Signed-off-by: Luben Tuikov Signed-off-by: Christian König Tested-by: Luben Tuikov Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 16 +++++++++++++--- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 714178f1b6c6e..2168163aad2d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -40,7 +40,7 @@ static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu) { struct amdgpu_bo_list *list = container_of(rcu, struct amdgpu_bo_list, rhead); - + mutex_destroy(&list->bo_list_mutex); kvfree(list); } @@ -136,6 +136,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, trace_amdgpu_cs_bo_status(list->num_entries, total_size); + mutex_init(&list->bo_list_mutex); *result = list; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 529d52a204cf4..9caea1688fc32 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -47,6 +47,10 @@ struct amdgpu_bo_list { struct amdgpu_bo *oa_obj; unsigned first_userptr; unsigned num_entries; + + /* Protect access during command submission. + */ + struct mutex bo_list_mutex; }; int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b28af04b0c3e9..d8f1335bc68f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -519,6 +519,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, return r; } + mutex_lock(&p->bo_list->bo_list_mutex); + /* One for TTM and one for the CS job */ amdgpu_bo_list_for_each_entry(e, p->bo_list) e->tv.num_shared = 2; @@ -651,6 +653,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, kvfree(e->user_pages); e->user_pages = NULL; } + mutex_unlock(&p->bo_list->bo_list_mutex); } return r; } @@ -690,9 +693,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, { unsigned i; - if (error && backoff) + if (error && backoff) { ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); + mutex_unlock(&parser->bo_list->bo_list_mutex); + } for (i = 0; i < parser->num_post_deps; i++) { drm_syncobj_put(parser->post_deps[i].syncobj); @@ -832,12 +837,16 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) continue; r = amdgpu_vm_bo_update(adev, bo_va, false); - if (r) + if (r) { + mutex_unlock(&p->bo_list->bo_list_mutex); return r; + } r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update); - if (r) + if (r) { + mutex_unlock(&p->bo_list->bo_list_mutex); return r; + } } r = amdgpu_vm_handle_moved(adev, vm); @@ -1278,6 +1287,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); mutex_unlock(&p->adev->notifier_lock); + mutex_unlock(&p->bo_list->bo_list_mutex); return 0; From 86bd6706c404ea703c11071d8b600d46bb98873c Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Tue, 17 May 2022 12:19:06 -0500 Subject: [PATCH 024/134] drm/amdgpu: remove acc_size from reserve/unreserve mem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TTM used to track the "acc_size" of all BOs internally. We needed to keep track of it in our memory reservation to avoid TTM running out of memory in its own accounting. However, that "acc_size" accounting has since been removed from TTM. Therefore we don't really need to track it any more. Signed-off-by: Alex Sierra Reviewed-by: Philip Yang Reviewed-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 60 ++++++------------- 1 file changed, 17 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 581c7ae411020..d2d2b16c4d504 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -115,21 +115,12 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size) * compromise that should work in most cases without reserving too * much memory for page tables unnecessarily (factor 16K, >> 14). */ -#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM) - -static size_t amdgpu_amdkfd_acc_size(uint64_t size) -{ - size >>= PAGE_SHIFT; - size *= sizeof(dma_addr_t) + sizeof(void *); - return __roundup_pow_of_two(sizeof(struct amdgpu_bo)) + - __roundup_pow_of_two(sizeof(struct ttm_tt)) + - PAGE_ALIGN(size); -} +#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM) /** * amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size - * of buffer including any reserved for control structures + * of buffer. * * @adev: Device to which allocated BO belongs to * @size: Size of buffer, in bytes, encapsulated by B0. This should be @@ -143,19 +134,16 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, { uint64_t reserved_for_pt = ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); - size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed; + size_t system_mem_needed, ttm_mem_needed, vram_needed; int ret = 0; - acc_size = amdgpu_amdkfd_acc_size(size); - + system_mem_needed = 0; + ttm_mem_needed = 0; vram_needed = 0; if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { - system_mem_needed = acc_size + size; - ttm_mem_needed = acc_size + size; + system_mem_needed = size; + ttm_mem_needed = size; } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - system_mem_needed = acc_size; - ttm_mem_needed = acc_size; - /* * Conservatively round up the allocation requirement to 2 MB * to avoid fragmentation caused by 4K allocations in the tail @@ -163,14 +151,10 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, */ vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN); } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { - system_mem_needed = acc_size + size; - ttm_mem_needed = acc_size; - } else if (alloc_flag & - (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | - KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { - system_mem_needed = acc_size; - ttm_mem_needed = acc_size; - } else { + system_mem_needed = size; + } else if (!(alloc_flag & + (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); return -ENOMEM; } @@ -208,28 +192,18 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, static void unreserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag) { - size_t acc_size; - - acc_size = amdgpu_amdkfd_acc_size(size); - spin_lock(&kfd_mem_limit.mem_limit_lock); if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { - kfd_mem_limit.system_mem_used -= (acc_size + size); - kfd_mem_limit.ttm_mem_used -= (acc_size + size); + kfd_mem_limit.system_mem_used -= size; + kfd_mem_limit.ttm_mem_used -= size; } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - kfd_mem_limit.system_mem_used -= acc_size; - kfd_mem_limit.ttm_mem_used -= acc_size; adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN); } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { - kfd_mem_limit.system_mem_used -= (acc_size + size); - kfd_mem_limit.ttm_mem_used -= acc_size; - } else if (alloc_flag & - (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | - KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) { - kfd_mem_limit.system_mem_used -= acc_size; - kfd_mem_limit.ttm_mem_used -= acc_size; - } else { + kfd_mem_limit.system_mem_used -= size; + } else if (!(alloc_flag & + (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | + KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); goto release; } From dc2b9c70ebca8e5661d33a15ed2b99d4510e90be Mon Sep 17 00:00:00 2001 From: Gavin Wan Date: Mon, 18 Jul 2022 15:30:51 -0400 Subject: [PATCH 025/134] drm/amdgpu: fix scratch register access method in SRIOV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The scratch register should be accessed through MMIO instead of RLCG in SRIOV, since it being used in RLCG register access function. Fixes: d54762cc3e6a ("drm/amdgpu: nuke dynamic gfx scratch reg allocation") Reviewed-by: Christian König Signed-off-by: Gavin Wan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 5349ca4d19e38..c6e0f9313a7f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -987,23 +987,23 @@ static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); uint32_t tmp = 0; unsigned i; int r; - WREG32_SOC15(GC, 0, mmSCRATCH_REG0, 0xCAFEDEAD); + WREG32(scratch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) return r; amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); - amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0) - - PACKET3_SET_UCONFIG_REG_START); + amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32_SOC15(GC, 0, mmSCRATCH_REG0); + tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) break; udelay(1); From 40835624efcde7f984cb859035b95b5a526d1a9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Thu, 14 Jul 2022 13:44:56 -0300 Subject: [PATCH 026/134] drm/amdgpu: Write masked value to control register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On the dce_v6_0 and dce_v8_0 hpd tear down callback, the tmp variable should be written into the control register instead of 0. Reviewed-by: André Almeida Signed-off-by: Maíra Canal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 3caf6f386042f..77f5e998a1202 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -339,7 +339,7 @@ static void dce_v6_0_hpd_fini(struct amdgpu_device *adev) tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]); tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK; - WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], 0); + WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp); amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 7c75df5bffed3..802e5c753271c 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -333,7 +333,7 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev) tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]); tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK; - WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], 0); + WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp); amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } From fb3e8ed0438347c63650850c011c99deb59d7e62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Thu, 14 Jul 2022 13:44:57 -0300 Subject: [PATCH 027/134] drm/amd/display: Change get_pipe_idx function scope MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turn previously global function into a static function as it is not used outside the file. Reviewed-by: André Almeida Signed-off-by: Maíra Canal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c | 2 +- drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c index 39f93072b5e0e..c5a0a3649e9a2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c @@ -251,7 +251,7 @@ unsigned int get_total_surface_size_in_mall_bytes( return size; } -unsigned int get_pipe_idx(struct display_mode_lib *mode_lib, unsigned int plane_idx) +static unsigned int get_pipe_idx(struct display_mode_lib *mode_lib, unsigned int plane_idx) { int pipe_idx = -1; int i; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index 47b149d4bfcf9..6e61b53823619 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -165,7 +165,6 @@ unsigned int get_total_surface_size_in_mall_bytes( struct display_mode_lib *mode_lib, const display_e2e_pipe_params_st *pipes, unsigned int num_pipes); -unsigned int get_pipe_idx(struct display_mode_lib *mode_lib, unsigned int plane_idx); bool get_is_phantom_pipe(struct display_mode_lib *mode_lib, const display_e2e_pipe_params_st *pipes, From 615dc75fa6a7fc6cf029b01cdfc9d4b78919e71c Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Tue, 19 Jul 2022 21:15:29 +0800 Subject: [PATCH 028/134] drm/amd/pm: enable gfx ulv and gpo on smu_v13_0_7 enable gfx ulv and gpo on smu_v13_0_7 Signed-off-by: Kenneth Feng Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 6f05487145660..16eea2de8a2d4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -251,6 +251,7 @@ smu_v13_0_7_get_allowed_feature_mask(struct smu_context *smu, if (adev->pm.pp_feature & PP_SCLK_DPM_MASK) { *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_GFXCLK_BIT); *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_GFX_IMU_BIT); + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_GFX_POWER_OPTIMIZER_BIT); } if (adev->pm.pp_feature & PP_GFXOFF_MASK) @@ -271,6 +272,9 @@ smu_v13_0_7_get_allowed_feature_mask(struct smu_context *smu, if (adev->pm.pp_feature & PP_SCLK_DEEP_SLEEP_MASK) *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_GFXCLK_BIT); + if (adev->pm.pp_feature & PP_ULV_MASK) + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_GFX_ULV_BIT); + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DS_LCLK_BIT); *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_MP0CLK_BIT); *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_MM_DPM_BIT); From 8a9899c95d1cd709d441960ca325c6c8184978bb Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Wed, 10 Mar 2021 23:40:01 +0800 Subject: [PATCH 029/134] drm/amd/display: Support vertical interrupt 0 for all dcn ASIC [Why] When CONFIG_DRM_AMD_SECURE_DISPLAY is enabled, it will try to register vertical interrupt 0 for specific task. Currently, only dcn10 have defined relevant info for vertical interrupt 0. If we enable CONFIG_DRM_AMD_SECURE_DISPLAY for other dcn ASIC, will get DC_IRQ_SOURCE_INVALID while calling dc_interrupt_to_irq_source() and cause pointer errors. [How] Add support of vertical interrupt 0 for all dcn ASIC. Tested-by: Daniel Wheeler Acked-by: Alan Liu Signed-off-by: Wayne Lin Signed-off-by: Alex Deucher --- .../display/dc/irq/dcn30/irq_service_dcn30.c | 14 +++++++------- .../dc/irq/dcn303/irq_service_dcn303.c | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c index 146cd18199128..2aa74ee1502a6 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c @@ -289,6 +289,13 @@ static const struct irq_source_info_funcs vline0_irq_info_funcs = { .funcs = &vblank_irq_info_funcs\ } +#define dmub_trace_int_entry()\ + [DC_IRQ_SOURCE_DMCUB_OUTBOX0] = {\ + IRQ_REG_ENTRY_DMUB(DMCUB_INTERRUPT_ENABLE, DMCUB_OUTBOX0_READY_INT_EN,\ + DMCUB_INTERRUPT_ACK, DMCUB_OUTBOX0_READY_INT_ACK),\ + .funcs = &dmub_trace_irq_info_funcs\ + } + #define vline0_int_entry(reg_num)\ [DC_IRQ_SOURCE_DC1_VLINE0 + reg_num] = {\ IRQ_REG_ENTRY(OTG, reg_num,\ @@ -297,13 +304,6 @@ static const struct irq_source_info_funcs vline0_irq_info_funcs = { .funcs = &vline0_irq_info_funcs\ } -#define dmub_trace_int_entry()\ - [DC_IRQ_SOURCE_DMCUB_OUTBOX0] = {\ - IRQ_REG_ENTRY_DMUB(DMCUB_INTERRUPT_ENABLE, DMCUB_OUTBOX0_READY_INT_EN,\ - DMCUB_INTERRUPT_ACK, DMCUB_OUTBOX0_READY_INT_ACK),\ - .funcs = &dmub_trace_irq_info_funcs\ - } - #define dummy_irq_entry() \ {\ .funcs = &dummy_irq_info_funcs\ diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c b/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c index 66e60762388e0..1d149d2901479 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn303/irq_service_dcn303.c @@ -24,6 +24,10 @@ static enum dc_irq_source to_dal_irq_source_dcn303(struct irq_service *irq_servi return DC_IRQ_SOURCE_VBLANK1; case DCN_1_0__SRCID__DC_D2_OTG_VSTARTUP: return DC_IRQ_SOURCE_VBLANK2; + case DCN_1_0__SRCID__OTG1_VERTICAL_INTERRUPT0_CONTROL: + return DC_IRQ_SOURCE_DC1_VLINE0; + case DCN_1_0__SRCID__OTG2_VERTICAL_INTERRUPT0_CONTROL: + return DC_IRQ_SOURCE_DC2_VLINE0; case DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT: return DC_IRQ_SOURCE_PFLIP1; case DCN_1_0__SRCID__HUBP1_FLIP_INTERRUPT: @@ -96,6 +100,11 @@ static const struct irq_source_info_funcs vblank_irq_info_funcs = { .ack = NULL }; +static const struct irq_source_info_funcs vline0_irq_info_funcs = { + .set = NULL, + .ack = NULL +}; + #undef BASE_INNER #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg @@ -164,6 +173,14 @@ static const struct irq_source_info_funcs vblank_irq_info_funcs = { .funcs = &vblank_irq_info_funcs\ } +#define vline0_int_entry(reg_num)\ + [DC_IRQ_SOURCE_DC1_VLINE0 + reg_num] = {\ + IRQ_REG_ENTRY(OTG, reg_num,\ + OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE,\ + OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_CLEAR),\ + .funcs = &vline0_irq_info_funcs\ + } + #define dummy_irq_entry() { .funcs = &dummy_irq_info_funcs } #define i2c_int_entry(reg_num) \ @@ -236,6 +253,8 @@ static const struct irq_source_info irq_source_info_dcn303[DAL_IRQ_SOURCES_NUMBE vupdate_no_lock_int_entry(1), vblank_int_entry(0), vblank_int_entry(1), + vline0_int_entry(0), + vline0_int_entry(1), }; static const struct irq_service_funcs irq_service_funcs_dcn303 = { From 0bf95a1ebd48866ad1438454061782d4d47765f7 Mon Sep 17 00:00:00 2001 From: Jun Lei Date: Wed, 25 Sep 2019 09:45:49 -0400 Subject: [PATCH 030/134] drm/amd/display: Remove unused variable Remove an unused variable "remove_disconnect_edp" which was a workaround bit. Tested-by: Daniel Wheeler Acked-by: Alan Liu Signed-off-by: Jun Lei Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 7c42377f0aaeb..be41f9fcf1ddc 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -683,7 +683,6 @@ struct dc_debug_options { bool hdmi20_disable; bool skip_detection_link_training; uint32_t edid_read_retry_times; - bool remove_disconnect_edp; unsigned int force_odm_combine; //bit vector based on otg inst unsigned int seamless_boot_odm_combine; unsigned int force_odm_combine_4to1; //bit vector based on otg inst From 044b5cb9e8bfe4f006546fd98148e95489a6e803 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Tue, 27 Jul 2021 18:32:45 -0400 Subject: [PATCH 031/134] drm/amd/display: Update in dml Update DML to configure drr_display in vba struct. Tested-by: Daniel Wheeler Reviewed-by: Dmytro Laktyushkin Acked-by: Alan Liu Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h | 1 + drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index 87c9b9f9976e5..e8b094006d95d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -499,6 +499,7 @@ struct _vcs_dpi_display_pipe_dest_params_st { unsigned int refresh_rate; bool synchronize_timings; unsigned int odm_combine_policy; + bool drr_display; }; struct _vcs_dpi_display_pipe_params_st { diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c index c5a0a3649e9a2..de78bb8489cb3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c @@ -697,6 +697,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib) mode_lib->vba.PixelClock[mode_lib->vba.NumberOfActivePlanes] = dst->pixel_rate_mhz; mode_lib->vba.PixelClockBackEnd[mode_lib->vba.NumberOfActivePlanes] = dst->pixel_rate_mhz; mode_lib->vba.DPPCLK[mode_lib->vba.NumberOfActivePlanes] = clks->dppclk_mhz; + mode_lib->vba.DRRDisplay[mode_lib->vba.NumberOfActiveSurfaces] = dst->drr_display; if (ip->is_line_buffer_bpp_fixed) mode_lib->vba.LBBitPerPixel[mode_lib->vba.NumberOfActivePlanes] = ip->line_buffer_fixed_bpp; From aec4706b0e70e0cab2d0650b63cf2ddc1b154352 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Wed, 22 Jun 2022 10:59:01 +0800 Subject: [PATCH 032/134] drm/amd/display: Expose function reset_cur_dp_mst_topology [Why & How] Need to leverage this function out of dc_link.c. Change it to public. Tested-by: Daniel Wheeler Reviewed-by: Hersen Wu Acked-by: Alan Liu Signed-off-by: Wayne Lin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 2 +- drivers/gpu/drm/amd/display/dc/dc_link.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 858ee51f930ae..ef54b96affa8a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -845,7 +845,7 @@ static bool discover_dp_mst_topology(struct dc_link *link, enum dc_detect_reason return link->type == dc_connection_mst_branch; } -static bool reset_cur_dp_mst_topology(struct dc_link *link) +bool reset_cur_dp_mst_topology(struct dc_link *link) { bool result = false; DC_LOGGER_INIT(link->ctx->logger); diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index 023774b94da34..a0af0f6afeef8 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -514,4 +514,7 @@ bool dc_dp_trace_is_logged(struct dc_link *link, struct dp_trace_lt_counts *dc_dp_trace_get_lt_counts(struct dc_link *link, bool in_detection); unsigned int dc_dp_trace_get_link_loss_count(struct dc_link *link); + +/* Destruct the mst topology of the link and reset the allocated payload table */ +bool reset_cur_dp_mst_topology(struct dc_link *link); #endif /* DC_LINK_H_ */ From c8a58ce18ca36b62749e326411176554462a5e2c Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Tue, 17 Aug 2021 18:14:42 +0800 Subject: [PATCH 033/134] drm/amd/display: fix trigger_hotplug to support mst case [Why & How] Correct few problems below to have debugfs trigger_hotplug entry supports mst case * Adjust the place for acquiring the hpd_lock. We'll also access dc_link when simulate unplug * When detect the connector is a mst root, call reset_cur_dp_mst_topology() to simulate unplug * Don't support hotplug caused by CSN message since we can't change mst topology info directly. We can't simulate that * Clean up redundant code Tested-by: Daniel Wheeler Reviewed-by: Hersen Wu Acked-by: Alan Liu Signed-off-by: Wayne Lin Signed-off-by: Alex Deucher --- .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index a1f40d0cd41cf..e0ea350784e3a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -1256,14 +1256,22 @@ static ssize_t trigger_hotplug(struct file *f, const char __user *buf, return -EINVAL; } + kfree(wr_buf); + if (param_nums <= 0) { DRM_DEBUG_DRIVER("user data not be read\n"); - kfree(wr_buf); + return -EINVAL; + } + + mutex_lock(&aconnector->hpd_lock); + + /* Don't support for mst end device*/ + if (aconnector->mst_port) { + mutex_unlock(&aconnector->hpd_lock); return -EINVAL; } if (param[0] == 1) { - mutex_lock(&aconnector->hpd_lock); if (!dc_link_detect_sink(aconnector->dc_link, &new_connection_type) && new_connection_type != dc_connection_none) @@ -1300,6 +1308,10 @@ static ssize_t trigger_hotplug(struct file *f, const char __user *buf, amdgpu_dm_update_connector_after_detect(aconnector); + /* If the aconnector is the root node in mst topology */ + if (aconnector->mst_mgr.mst_state == true) + reset_cur_dp_mst_topology(link); + drm_modeset_lock_all(dev); dm_restore_drm_connector_state(dev, connector); drm_modeset_unlock_all(dev); @@ -1310,7 +1322,6 @@ static ssize_t trigger_hotplug(struct file *f, const char __user *buf, unlock: mutex_unlock(&aconnector->hpd_lock); - kfree(wr_buf); return size; } From 8b076fa7c5befd1d3e1d892ae466f5334e4c6c99 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Tue, 17 Aug 2021 18:58:31 +0800 Subject: [PATCH 034/134] drm/amd/display: Add is_mst_connector debugfs entry [Why & How] Add "is_mst_connector" debugfs entry to help distinguish whether a connector is in a mst topology or not. Access it with the following command: cat /sys/kernel/debug/dri/0/DP-X/is_mst_connector Result: - "root" stands for the root connector of the topology - "branch" stands for branch device of the topology - "end" stands for leaf node connector of the topology - "no" stands for the connector is not a device of a mst topology Tested-by: Daniel Wheeler Reviewed-by: Hersen Wu Acked-by: Alan Liu Signed-off-by: Wayne Lin Signed-off-by: Alex Deucher --- .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index e0ea350784e3a..4eecf052d08d1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -2540,6 +2540,57 @@ static int target_backlight_show(struct seq_file *m, void *unused) return 0; } +/* + * function description: Determine if the connector is mst connector + * + * This function helps to determine whether a connector is a mst connector. + * - "root" stands for the root connector of the topology + * - "branch" stands for branch device of the topology + * - "end" stands for leaf node connector of the topology + * - "no" stands for the connector is not a device of a mst topology + * Access it with the following command: + * + * cat /sys/kernel/debug/dri/0/DP-X/is_mst_connector + * + */ +static int dp_is_mst_connector_show(struct seq_file *m, void *unused) +{ + struct drm_connector *connector = m->private; + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct drm_dp_mst_topology_mgr *mgr = NULL; + struct drm_dp_mst_port *port = NULL; + char *role = NULL; + + mutex_lock(&aconnector->hpd_lock); + + if (aconnector->mst_mgr.mst_state) { + role = "root"; + } else if (aconnector->mst_port && + aconnector->mst_port->mst_mgr.mst_state) { + + role = "end"; + + mgr = &aconnector->mst_port->mst_mgr; + port = aconnector->port; + + drm_modeset_lock(&mgr->base.lock, NULL); + if (port->pdt == DP_PEER_DEVICE_MST_BRANCHING && + port->mcs) + role = "branch"; + drm_modeset_unlock(&mgr->base.lock); + + } else { + role = "no"; + } + + seq_printf(m, "%s\n", role); + + mutex_unlock(&aconnector->hpd_lock); + + return 0; +} + + DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support); DEFINE_SHOW_ATTRIBUTE(dmub_fw_state); DEFINE_SHOW_ATTRIBUTE(dmub_tracebuffer); @@ -2549,6 +2600,7 @@ DEFINE_SHOW_ATTRIBUTE(hdcp_sink_capability); #endif DEFINE_SHOW_ATTRIBUTE(internal_display); DEFINE_SHOW_ATTRIBUTE(psr_capability); +DEFINE_SHOW_ATTRIBUTE(dp_is_mst_connector); static const struct file_operations dp_dsc_clock_en_debugfs_fops = { .owner = THIS_MODULE, @@ -2692,6 +2744,7 @@ static const struct { {"dp_dsc_fec_support", &dp_dsc_fec_support_fops}, {"max_bpc", &dp_max_bpc_debugfs_fops}, {"dsc_disable_passthrough", &dp_dsc_disable_passthrough_debugfs_fops}, + {"is_mst_connector", &dp_is_mst_connector_fops} }; #ifdef CONFIG_DRM_AMD_DC_HDCP From 25f7cde8bad9fe5943851d3d602e9fddb7977961 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Wed, 20 Jul 2022 15:11:56 -0400 Subject: [PATCH 035/134] drm/amd/display: Add tags for indicating mst progress status [Why & How] In order to leverage igt tool to maintain mst feature, expose new debugfs entry "mst_progress_status". In our dm flow, record down the result of each phase of mst and user can examine the mst result by checking whether each phase get completed successfully. Tested-by: Daniel Wheeler Reviewed-by: Hersen Wu Acked-by: Alan Liu Signed-off-by: Wayne Lin Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 20 ++++++++ .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 46 ++++++++++++++++++- .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 18 +++++++- .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 13 ++++++ 4 files changed, 94 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 7bd750e9f8913..90b306a1dd687 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -571,6 +571,14 @@ struct dsc_preferred_settings { bool dsc_force_disable_passthrough; }; +enum mst_progress_status { + MST_STATUS_DEFAULT = 0, + MST_PROBE = BIT(0), + MST_REMOTE_EDID = BIT(1), + MST_ALLOCATE_NEW_PAYLOAD = BIT(2), + MST_CLEAR_ALLOCATED_PAYLOAD = BIT(3), +}; + struct amdgpu_dm_connector { struct drm_connector base; @@ -623,8 +631,20 @@ struct amdgpu_dm_connector { struct drm_display_mode freesync_vid_base; int psr_skip_count; + + /* Record progress status of mst*/ + uint8_t mst_status; }; +static inline void amdgpu_dm_set_mst_status(uint8_t *status, + uint8_t flags, bool set) +{ + if (set) + *status |= flags; + else + *status &= ~flags; +} + #define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base) extern const struct amdgpu_ip_block_version dm_ip_block; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 4eecf052d08d1..aa4edf1820956 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -50,6 +50,13 @@ struct dmub_debugfs_trace_entry { uint32_t param1; }; +static const char *const mst_progress_status[] = { + "probe", + "remote_edid", + "allocate_new_payload", + "clear_allocated_payload", +}; + /* parse_write_buffer_into_params - Helper function to parse debugfs write buffer into an array * * Function takes in attributes passed to debugfs write entry @@ -2590,6 +2597,41 @@ static int dp_is_mst_connector_show(struct seq_file *m, void *unused) return 0; } +/* + * function description: Read out the mst progress status + * + * This function helps to determine the mst progress status of + * a mst connector. + * + * Access it with the following command: + * + * cat /sys/kernel/debug/dri/0/DP-X/mst_progress_status + * + */ +static int dp_mst_progress_status_show(struct seq_file *m, void *unused) +{ + struct drm_connector *connector = m->private; + struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector); + struct amdgpu_device *adev = drm_to_adev(connector->dev); + int i; + + mutex_lock(&aconnector->hpd_lock); + mutex_lock(&adev->dm.dc_lock); + + if (aconnector->mst_status == MST_STATUS_DEFAULT) { + seq_puts(m, "disabled\n"); + } else { + for (i = 0; i < sizeof(mst_progress_status)/sizeof(char *); i++) + seq_printf(m, "%s:%s\n", + mst_progress_status[i], + aconnector->mst_status & BIT(i) ? "done" : "not_done"); + } + + mutex_unlock(&adev->dm.dc_lock); + mutex_unlock(&aconnector->hpd_lock); + + return 0; +} DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support); DEFINE_SHOW_ATTRIBUTE(dmub_fw_state); @@ -2601,6 +2643,7 @@ DEFINE_SHOW_ATTRIBUTE(hdcp_sink_capability); DEFINE_SHOW_ATTRIBUTE(internal_display); DEFINE_SHOW_ATTRIBUTE(psr_capability); DEFINE_SHOW_ATTRIBUTE(dp_is_mst_connector); +DEFINE_SHOW_ATTRIBUTE(dp_mst_progress_status); static const struct file_operations dp_dsc_clock_en_debugfs_fops = { .owner = THIS_MODULE, @@ -2744,7 +2787,8 @@ static const struct { {"dp_dsc_fec_support", &dp_dsc_fec_support_fops}, {"max_bpc", &dp_max_bpc_debugfs_fops}, {"dsc_disable_passthrough", &dp_dsc_disable_passthrough_debugfs_fops}, - {"is_mst_connector", &dp_is_mst_connector_fops} + {"is_mst_connector", &dp_is_mst_connector_fops}, + {"mst_progress_status", &dp_mst_progress_status_fops} }; #ifdef CONFIG_DRM_AMD_DC_HDCP diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 137645d40b72e..d66e3cd64ebd0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -312,6 +312,8 @@ bool dm_helpers_dp_mst_send_payload_allocation( struct amdgpu_dm_connector *aconnector; struct drm_dp_mst_topology_mgr *mst_mgr; struct drm_dp_mst_port *mst_port; + enum mst_progress_status set_flag = MST_ALLOCATE_NEW_PAYLOAD; + enum mst_progress_status clr_flag = MST_CLEAR_ALLOCATED_PAYLOAD; aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; @@ -325,8 +327,20 @@ bool dm_helpers_dp_mst_send_payload_allocation( if (!mst_mgr->mst_state) return false; - /* It's OK for this to fail */ - drm_dp_update_payload_part2(mst_mgr); + if (!enable) { + set_flag = MST_CLEAR_ALLOCATED_PAYLOAD; + clr_flag = MST_ALLOCATE_NEW_PAYLOAD; + } + + if (drm_dp_update_payload_part2(mst_mgr)) { + amdgpu_dm_set_mst_status(&aconnector->mst_status, + set_flag, false); + } else { + amdgpu_dm_set_mst_status(&aconnector->mst_status, + set_flag, true); + amdgpu_dm_set_mst_status(&aconnector->mst_status, + clr_flag, false); + } if (!enable) drm_dp_mst_deallocate_vcpi(mst_mgr, mst_port); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 1f722309cfdd6..2e74ccf7df5bf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -179,6 +179,8 @@ amdgpu_dm_mst_connector_early_unregister(struct drm_connector *connector) aconnector->dc_sink = NULL; aconnector->edid = NULL; } + + aconnector->mst_status = MST_STATUS_DEFAULT; drm_modeset_unlock(&root->mst_mgr.base.lock); } @@ -279,6 +281,9 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector) edid = drm_dp_mst_get_edid(connector, &aconnector->mst_port->mst_mgr, aconnector->port); if (!edid) { + amdgpu_dm_set_mst_status(&aconnector->mst_status, + MST_REMOTE_EDID, false); + drm_connector_update_edid_property( &aconnector->base, NULL); @@ -309,6 +314,8 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector) } aconnector->edid = edid; + amdgpu_dm_set_mst_status(&aconnector->mst_status, + MST_REMOTE_EDID, true); } if (aconnector->dc_sink && aconnector->dc_sink->sink_signal == SIGNAL_TYPE_VIRTUAL) { @@ -430,6 +437,10 @@ dm_dp_mst_detect(struct drm_connector *connector, dc_sink_release(aconnector->dc_sink); aconnector->dc_sink = NULL; aconnector->edid = NULL; + + amdgpu_dm_set_mst_status(&aconnector->mst_status, + MST_REMOTE_EDID | MST_ALLOCATE_NEW_PAYLOAD | MST_CLEAR_ALLOCATED_PAYLOAD, + false); } return connection_status; @@ -526,6 +537,8 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, connector = &aconnector->base; aconnector->port = port; aconnector->mst_port = master; + amdgpu_dm_set_mst_status(&aconnector->mst_status, + MST_PROBE, true); if (drm_connector_init( dev, From 5d945cbcd4b16a29d6470a80dfb19738f9a4319f Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Wed, 20 Jul 2022 15:31:42 -0400 Subject: [PATCH 036/134] drm/amd/display: Create a file dedicated to planes [Why] The amdgpu_dm file contains most of the code that works as an interface between DRM API and DC. As a result, this file becomes very large since it comprises multiple abstractions such as plane manipulation. [How] This commit extracts the plane code to its specific file named amdgpu_dm_plane. This change does not change anything inside the functions; the only exception is converting some static functions to a global function. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Acked-by: Alan Liu Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/Makefile | 7 +- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2220 +++-------------- .../amd/display/amdgpu_dm/amdgpu_dm_plane.c | 1645 ++++++++++++ .../amd/display/amdgpu_dm/amdgpu_dm_plane.h | 73 + 4 files changed, 2103 insertions(+), 1842 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile index 718e123a32300..ec559ea902a39 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile @@ -25,7 +25,12 @@ -AMDGPUDM = amdgpu_dm.o amdgpu_dm_irq.o amdgpu_dm_mst_types.o amdgpu_dm_color.o +AMDGPUDM = \ + amdgpu_dm.o \ + amdgpu_dm_plane.o \ + amdgpu_dm_irq.o \ + amdgpu_dm_mst_types.o \ + amdgpu_dm_color.o ifdef CONFIG_DRM_AMD_DC_DCN AMDGPUDM += dc_fpu.o diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3e83fed540e88..8a1821a2551ca 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -46,6 +46,7 @@ #include "amdgpu_ucode.h" #include "atom.h" #include "amdgpu_dm.h" +#include "amdgpu_dm_plane.h" #ifdef CONFIG_DRM_AMD_DC_HDCP #include "amdgpu_dm_hdcp.h" #include @@ -206,10 +207,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev); /* removes and deallocates the drm structures, created by the above function */ static void amdgpu_dm_destroy_drm_device(struct amdgpu_display_manager *dm); -static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, - struct drm_plane *plane, - unsigned long possible_crtcs, - const struct dc_plane_cap *plane_cap); static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, struct drm_plane *plane, uint32_t link_index); @@ -228,12 +225,6 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state); static int amdgpu_dm_atomic_check(struct drm_device *dev, struct drm_atomic_state *state); -static void handle_cursor_update(struct drm_plane *plane, - struct drm_plane_state *old_plane_state); - -static const struct drm_format_info * -amd_get_format_info(const struct drm_mode_fb_cmd2 *cmd); - static void handle_hpd_irq_helper(struct amdgpu_dm_connector *aconnector); static void handle_hpd_rx_irq(void *param); @@ -4710,1147 +4701,265 @@ static const struct drm_encoder_funcs amdgpu_dm_encoder_funcs = { .destroy = amdgpu_dm_encoder_destroy, }; - -static void get_min_max_dc_plane_scaling(struct drm_device *dev, - struct drm_framebuffer *fb, - int *min_downscale, int *max_upscale) +static int +fill_plane_color_attributes(const struct drm_plane_state *plane_state, + const enum surface_pixel_format format, + enum dc_color_space *color_space) { - struct amdgpu_device *adev = drm_to_adev(dev); - struct dc *dc = adev->dm.dc; - /* Caps for all supported planes are the same on DCE and DCN 1 - 3 */ - struct dc_plane_cap *plane_cap = &dc->caps.planes[0]; + bool full_range; - switch (fb->format->format) { - case DRM_FORMAT_P010: - case DRM_FORMAT_NV12: - case DRM_FORMAT_NV21: - *max_upscale = plane_cap->max_upscale_factor.nv12; - *min_downscale = plane_cap->max_downscale_factor.nv12; + *color_space = COLOR_SPACE_SRGB; + + /* DRM color properties only affect non-RGB formats. */ + if (format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) + return 0; + + full_range = (plane_state->color_range == DRM_COLOR_YCBCR_FULL_RANGE); + + switch (plane_state->color_encoding) { + case DRM_COLOR_YCBCR_BT601: + if (full_range) + *color_space = COLOR_SPACE_YCBCR601; + else + *color_space = COLOR_SPACE_YCBCR601_LIMITED; break; - case DRM_FORMAT_XRGB16161616F: - case DRM_FORMAT_ARGB16161616F: - case DRM_FORMAT_XBGR16161616F: - case DRM_FORMAT_ABGR16161616F: - *max_upscale = plane_cap->max_upscale_factor.fp16; - *min_downscale = plane_cap->max_downscale_factor.fp16; + case DRM_COLOR_YCBCR_BT709: + if (full_range) + *color_space = COLOR_SPACE_YCBCR709; + else + *color_space = COLOR_SPACE_YCBCR709_LIMITED; break; - default: - *max_upscale = plane_cap->max_upscale_factor.argb8888; - *min_downscale = plane_cap->max_downscale_factor.argb8888; + case DRM_COLOR_YCBCR_BT2020: + if (full_range) + *color_space = COLOR_SPACE_2020_YCBCR; + else + return -EINVAL; break; - } - /* - * A factor of 1 in the plane_cap means to not allow scaling, ie. use a - * scaling factor of 1.0 == 1000 units. - */ - if (*max_upscale == 1) - *max_upscale = 1000; + default: + return -EINVAL; + } - if (*min_downscale == 1) - *min_downscale = 1000; + return 0; } - -static int fill_dc_scaling_info(struct amdgpu_device *adev, - const struct drm_plane_state *state, - struct dc_scaling_info *scaling_info) +static int +fill_dc_plane_info_and_addr(struct amdgpu_device *adev, + const struct drm_plane_state *plane_state, + const uint64_t tiling_flags, + struct dc_plane_info *plane_info, + struct dc_plane_address *address, + bool tmz_surface, + bool force_disable_dcc) { - int scale_w, scale_h, min_downscale, max_upscale; - - memset(scaling_info, 0, sizeof(*scaling_info)); + const struct drm_framebuffer *fb = plane_state->fb; + const struct amdgpu_framebuffer *afb = + to_amdgpu_framebuffer(plane_state->fb); + int ret; - /* Source is fixed 16.16 but we ignore mantissa for now... */ - scaling_info->src_rect.x = state->src_x >> 16; - scaling_info->src_rect.y = state->src_y >> 16; + memset(plane_info, 0, sizeof(*plane_info)); - /* - * For reasons we don't (yet) fully understand a non-zero - * src_y coordinate into an NV12 buffer can cause a - * system hang on DCN1x. - * To avoid hangs (and maybe be overly cautious) - * let's reject both non-zero src_x and src_y. - * - * We currently know of only one use-case to reproduce a - * scenario with non-zero src_x and src_y for NV12, which - * is to gesture the YouTube Android app into full screen - * on ChromeOS. - */ - if (((adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 0)) || - (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 1))) && - (state->fb && state->fb->format->format == DRM_FORMAT_NV12 && - (scaling_info->src_rect.x != 0 || scaling_info->src_rect.y != 0))) + switch (fb->format->format) { + case DRM_FORMAT_C8: + plane_info->format = + SURFACE_PIXEL_FORMAT_GRPH_PALETA_256_COLORS; + break; + case DRM_FORMAT_RGB565: + plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_RGB565; + break; + case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_ARGB8888: + plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB8888; + break; + case DRM_FORMAT_XRGB2101010: + case DRM_FORMAT_ARGB2101010: + plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010; + break; + case DRM_FORMAT_XBGR2101010: + case DRM_FORMAT_ABGR2101010: + plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010; + break; + case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_ABGR8888: + plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR8888; + break; + case DRM_FORMAT_NV21: + plane_info->format = SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr; + break; + case DRM_FORMAT_NV12: + plane_info->format = SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb; + break; + case DRM_FORMAT_P010: + plane_info->format = SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb; + break; + case DRM_FORMAT_XRGB16161616F: + case DRM_FORMAT_ARGB16161616F: + plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F; + break; + case DRM_FORMAT_XBGR16161616F: + case DRM_FORMAT_ABGR16161616F: + plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F; + break; + case DRM_FORMAT_XRGB16161616: + case DRM_FORMAT_ARGB16161616: + plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616; + break; + case DRM_FORMAT_XBGR16161616: + case DRM_FORMAT_ABGR16161616: + plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616; + break; + default: + DRM_ERROR( + "Unsupported screen format %p4cc\n", + &fb->format->format); return -EINVAL; + } - scaling_info->src_rect.width = state->src_w >> 16; - if (scaling_info->src_rect.width == 0) - return -EINVAL; + switch (plane_state->rotation & DRM_MODE_ROTATE_MASK) { + case DRM_MODE_ROTATE_0: + plane_info->rotation = ROTATION_ANGLE_0; + break; + case DRM_MODE_ROTATE_90: + plane_info->rotation = ROTATION_ANGLE_90; + break; + case DRM_MODE_ROTATE_180: + plane_info->rotation = ROTATION_ANGLE_180; + break; + case DRM_MODE_ROTATE_270: + plane_info->rotation = ROTATION_ANGLE_270; + break; + default: + plane_info->rotation = ROTATION_ANGLE_0; + break; + } - scaling_info->src_rect.height = state->src_h >> 16; - if (scaling_info->src_rect.height == 0) - return -EINVAL; - scaling_info->dst_rect.x = state->crtc_x; - scaling_info->dst_rect.y = state->crtc_y; + plane_info->visible = true; + plane_info->stereo_format = PLANE_STEREO_FORMAT_NONE; - if (state->crtc_w == 0) - return -EINVAL; + plane_info->layer_index = 0; - scaling_info->dst_rect.width = state->crtc_w; + ret = fill_plane_color_attributes(plane_state, plane_info->format, + &plane_info->color_space); + if (ret) + return ret; - if (state->crtc_h == 0) - return -EINVAL; + ret = fill_plane_buffer_attributes(adev, afb, plane_info->format, + plane_info->rotation, tiling_flags, + &plane_info->tiling_info, + &plane_info->plane_size, + &plane_info->dcc, address, + tmz_surface, force_disable_dcc); + if (ret) + return ret; - scaling_info->dst_rect.height = state->crtc_h; + fill_blending_from_plane_state( + plane_state, &plane_info->per_pixel_alpha, &plane_info->pre_multiplied_alpha, + &plane_info->global_alpha, &plane_info->global_alpha_value); - /* DRM doesn't specify clipping on destination output. */ - scaling_info->clip_rect = scaling_info->dst_rect; + return 0; +} - /* Validate scaling per-format with DC plane caps */ - if (state->plane && state->plane->dev && state->fb) { - get_min_max_dc_plane_scaling(state->plane->dev, state->fb, - &min_downscale, &max_upscale); - } else { - min_downscale = 250; - max_upscale = 16000; - } +static int fill_dc_plane_attributes(struct amdgpu_device *adev, + struct dc_plane_state *dc_plane_state, + struct drm_plane_state *plane_state, + struct drm_crtc_state *crtc_state) +{ + struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state); + struct amdgpu_framebuffer *afb = (struct amdgpu_framebuffer *)plane_state->fb; + struct dc_scaling_info scaling_info; + struct dc_plane_info plane_info; + int ret; + bool force_disable_dcc = false; - scale_w = scaling_info->dst_rect.width * 1000 / - scaling_info->src_rect.width; + ret = fill_dc_scaling_info(adev, plane_state, &scaling_info); + if (ret) + return ret; - if (scale_w < min_downscale || scale_w > max_upscale) - return -EINVAL; + dc_plane_state->src_rect = scaling_info.src_rect; + dc_plane_state->dst_rect = scaling_info.dst_rect; + dc_plane_state->clip_rect = scaling_info.clip_rect; + dc_plane_state->scaling_quality = scaling_info.scaling_quality; - scale_h = scaling_info->dst_rect.height * 1000 / - scaling_info->src_rect.height; + force_disable_dcc = adev->asic_type == CHIP_RAVEN && adev->in_suspend; + ret = fill_dc_plane_info_and_addr(adev, plane_state, + afb->tiling_flags, + &plane_info, + &dc_plane_state->address, + afb->tmz_surface, + force_disable_dcc); + if (ret) + return ret; - if (scale_h < min_downscale || scale_h > max_upscale) - return -EINVAL; + dc_plane_state->format = plane_info.format; + dc_plane_state->color_space = plane_info.color_space; + dc_plane_state->format = plane_info.format; + dc_plane_state->plane_size = plane_info.plane_size; + dc_plane_state->rotation = plane_info.rotation; + dc_plane_state->horizontal_mirror = plane_info.horizontal_mirror; + dc_plane_state->stereo_format = plane_info.stereo_format; + dc_plane_state->tiling_info = plane_info.tiling_info; + dc_plane_state->visible = plane_info.visible; + dc_plane_state->per_pixel_alpha = plane_info.per_pixel_alpha; + dc_plane_state->pre_multiplied_alpha = plane_info.pre_multiplied_alpha; + dc_plane_state->global_alpha = plane_info.global_alpha; + dc_plane_state->global_alpha_value = plane_info.global_alpha_value; + dc_plane_state->dcc = plane_info.dcc; + dc_plane_state->layer_index = plane_info.layer_index; // Always returns 0 + dc_plane_state->flip_int_enabled = true; /* - * The "scaling_quality" can be ignored for now, quality = 0 has DC - * assume reasonable defaults based on the format. + * Always set input transfer function, since plane state is refreshed + * every time. */ + ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state, dc_plane_state); + if (ret) + return ret; return 0; } -static void -fill_gfx8_tiling_info_from_flags(union dc_tiling_info *tiling_info, - uint64_t tiling_flags) -{ - /* Fill GFX8 params */ - if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == DC_ARRAY_2D_TILED_THIN1) { - unsigned int bankw, bankh, mtaspect, tile_split, num_banks; - - bankw = AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH); - bankh = AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT); - mtaspect = AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT); - tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT); - num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS); - - /* XXX fix me for VI */ - tiling_info->gfx8.num_banks = num_banks; - tiling_info->gfx8.array_mode = - DC_ARRAY_2D_TILED_THIN1; - tiling_info->gfx8.tile_split = tile_split; - tiling_info->gfx8.bank_width = bankw; - tiling_info->gfx8.bank_height = bankh; - tiling_info->gfx8.tile_aspect = mtaspect; - tiling_info->gfx8.tile_mode = - DC_ADDR_SURF_MICRO_TILING_DISPLAY; - } else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) - == DC_ARRAY_1D_TILED_THIN1) { - tiling_info->gfx8.array_mode = DC_ARRAY_1D_TILED_THIN1; - } - - tiling_info->gfx8.pipe_config = - AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG); -} +/** + * fill_dc_dirty_rects() - Fill DC dirty regions for PSR selective updates + * + * @plane: DRM plane containing dirty regions that need to be flushed to the eDP + * remote fb + * @old_plane_state: Old state of @plane + * @new_plane_state: New state of @plane + * @crtc_state: New state of CRTC connected to the @plane + * @flip_addrs: DC flip tracking struct, which also tracts dirty rects + * + * For PSR SU, DC informs the DMUB uController of dirty rectangle regions + * (referred to as "damage clips" in DRM nomenclature) that require updating on + * the eDP remote buffer. The responsibility of specifying the dirty regions is + * amdgpu_dm's. + * + * A damage-aware DRM client should fill the FB_DAMAGE_CLIPS property on the + * plane with regions that require flushing to the eDP remote buffer. In + * addition, certain use cases - such as cursor and multi-plane overlay (MPO) - + * implicitly provide damage clips without any client support via the plane + * bounds. + * + * Today, amdgpu_dm only supports the MPO and cursor usecase. + * + * TODO: Also enable for FB_DAMAGE_CLIPS + */ +static void fill_dc_dirty_rects(struct drm_plane *plane, + struct drm_plane_state *old_plane_state, + struct drm_plane_state *new_plane_state, + struct drm_crtc_state *crtc_state, + struct dc_flip_addrs *flip_addrs) +{ + struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state); + struct rect *dirty_rects = flip_addrs->dirty_rects; + uint32_t num_clips; + bool bb_changed; + bool fb_changed; + uint32_t i = 0; -static void -fill_gfx9_tiling_info_from_device(const struct amdgpu_device *adev, - union dc_tiling_info *tiling_info) -{ - tiling_info->gfx9.num_pipes = - adev->gfx.config.gb_addr_config_fields.num_pipes; - tiling_info->gfx9.num_banks = - adev->gfx.config.gb_addr_config_fields.num_banks; - tiling_info->gfx9.pipe_interleave = - adev->gfx.config.gb_addr_config_fields.pipe_interleave_size; - tiling_info->gfx9.num_shader_engines = - adev->gfx.config.gb_addr_config_fields.num_se; - tiling_info->gfx9.max_compressed_frags = - adev->gfx.config.gb_addr_config_fields.max_compress_frags; - tiling_info->gfx9.num_rb_per_se = - adev->gfx.config.gb_addr_config_fields.num_rb_per_se; - tiling_info->gfx9.shaderEnable = 1; - if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) - tiling_info->gfx9.num_pkrs = adev->gfx.config.gb_addr_config_fields.num_pkrs; -} - -static int -validate_dcc(struct amdgpu_device *adev, - const enum surface_pixel_format format, - const enum dc_rotation_angle rotation, - const union dc_tiling_info *tiling_info, - const struct dc_plane_dcc_param *dcc, - const struct dc_plane_address *address, - const struct plane_size *plane_size) -{ - struct dc *dc = adev->dm.dc; - struct dc_dcc_surface_param input; - struct dc_surface_dcc_cap output; - - memset(&input, 0, sizeof(input)); - memset(&output, 0, sizeof(output)); - - if (!dcc->enable) - return 0; - - if (format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || - !dc->cap_funcs.get_dcc_compression_cap) - return -EINVAL; - - input.format = format; - input.surface_size.width = plane_size->surface_size.width; - input.surface_size.height = plane_size->surface_size.height; - input.swizzle_mode = tiling_info->gfx9.swizzle; - - if (rotation == ROTATION_ANGLE_0 || rotation == ROTATION_ANGLE_180) - input.scan = SCAN_DIRECTION_HORIZONTAL; - else if (rotation == ROTATION_ANGLE_90 || rotation == ROTATION_ANGLE_270) - input.scan = SCAN_DIRECTION_VERTICAL; - - if (!dc->cap_funcs.get_dcc_compression_cap(dc, &input, &output)) - return -EINVAL; - - if (!output.capable) - return -EINVAL; - - if (dcc->independent_64b_blks == 0 && - output.grph.rgb.independent_64b_blks != 0) - return -EINVAL; - - return 0; -} - -static bool -modifier_has_dcc(uint64_t modifier) -{ - return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC, modifier); -} - -static unsigned -modifier_gfx9_swizzle_mode(uint64_t modifier) -{ - if (modifier == DRM_FORMAT_MOD_LINEAR) - return 0; - - return AMD_FMT_MOD_GET(TILE, modifier); -} - -static const struct drm_format_info * -amd_get_format_info(const struct drm_mode_fb_cmd2 *cmd) -{ - return amdgpu_lookup_format_info(cmd->pixel_format, cmd->modifier[0]); -} - -static void -fill_gfx9_tiling_info_from_modifier(const struct amdgpu_device *adev, - union dc_tiling_info *tiling_info, - uint64_t modifier) -{ - unsigned int mod_bank_xor_bits = AMD_FMT_MOD_GET(BANK_XOR_BITS, modifier); - unsigned int mod_pipe_xor_bits = AMD_FMT_MOD_GET(PIPE_XOR_BITS, modifier); - unsigned int pkrs_log2 = AMD_FMT_MOD_GET(PACKERS, modifier); - unsigned int pipes_log2; - - pipes_log2 = min(5u, mod_pipe_xor_bits); - - fill_gfx9_tiling_info_from_device(adev, tiling_info); - - if (!IS_AMD_FMT_MOD(modifier)) - return; - - tiling_info->gfx9.num_pipes = 1u << pipes_log2; - tiling_info->gfx9.num_shader_engines = 1u << (mod_pipe_xor_bits - pipes_log2); - - if (adev->family >= AMDGPU_FAMILY_NV) { - tiling_info->gfx9.num_pkrs = 1u << pkrs_log2; - } else { - tiling_info->gfx9.num_banks = 1u << mod_bank_xor_bits; - - /* for DCC we know it isn't rb aligned, so rb_per_se doesn't matter. */ - } -} - -enum dm_micro_swizzle { - MICRO_SWIZZLE_Z = 0, - MICRO_SWIZZLE_S = 1, - MICRO_SWIZZLE_D = 2, - MICRO_SWIZZLE_R = 3 -}; - -static bool dm_plane_format_mod_supported(struct drm_plane *plane, - uint32_t format, - uint64_t modifier) -{ - struct amdgpu_device *adev = drm_to_adev(plane->dev); - const struct drm_format_info *info = drm_format_info(format); - int i; - - enum dm_micro_swizzle microtile = modifier_gfx9_swizzle_mode(modifier) & 3; - - if (!info) - return false; - - /* - * We always have to allow these modifiers: - * 1. Core DRM checks for LINEAR support if userspace does not provide modifiers. - * 2. Not passing any modifiers is the same as explicitly passing INVALID. - */ - if (modifier == DRM_FORMAT_MOD_LINEAR || - modifier == DRM_FORMAT_MOD_INVALID) { - return true; - } - - /* Check that the modifier is on the list of the plane's supported modifiers. */ - for (i = 0; i < plane->modifier_count; i++) { - if (modifier == plane->modifiers[i]) - break; - } - if (i == plane->modifier_count) - return false; - - /* - * For D swizzle the canonical modifier depends on the bpp, so check - * it here. - */ - if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) == AMD_FMT_MOD_TILE_VER_GFX9 && - adev->family >= AMDGPU_FAMILY_NV) { - if (microtile == MICRO_SWIZZLE_D && info->cpp[0] == 4) - return false; - } - - if (adev->family >= AMDGPU_FAMILY_RV && microtile == MICRO_SWIZZLE_D && - info->cpp[0] < 8) - return false; - - if (modifier_has_dcc(modifier)) { - /* Per radeonsi comments 16/64 bpp are more complicated. */ - if (info->cpp[0] != 4) - return false; - /* We support multi-planar formats, but not when combined with - * additional DCC metadata planes. */ - if (info->num_planes > 1) - return false; - } - - return true; -} - -static void -add_modifier(uint64_t **mods, uint64_t *size, uint64_t *cap, uint64_t mod) -{ - if (!*mods) - return; - - if (*cap - *size < 1) { - uint64_t new_cap = *cap * 2; - uint64_t *new_mods = kmalloc(new_cap * sizeof(uint64_t), GFP_KERNEL); - - if (!new_mods) { - kfree(*mods); - *mods = NULL; - return; - } - - memcpy(new_mods, *mods, sizeof(uint64_t) * *size); - kfree(*mods); - *mods = new_mods; - *cap = new_cap; - } - - (*mods)[*size] = mod; - *size += 1; -} - -static void -add_gfx9_modifiers(const struct amdgpu_device *adev, - uint64_t **mods, uint64_t *size, uint64_t *capacity) -{ - int pipes = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes); - int pipe_xor_bits = min(8, pipes + - ilog2(adev->gfx.config.gb_addr_config_fields.num_se)); - int bank_xor_bits = min(8 - pipe_xor_bits, - ilog2(adev->gfx.config.gb_addr_config_fields.num_banks)); - int rb = ilog2(adev->gfx.config.gb_addr_config_fields.num_se) + - ilog2(adev->gfx.config.gb_addr_config_fields.num_rb_per_se); - - - if (adev->family == AMDGPU_FAMILY_RV) { - /* Raven2 and later */ - bool has_constant_encode = adev->asic_type > CHIP_RAVEN || adev->external_rev_id >= 0x81; - - /* - * No _D DCC swizzles yet because we only allow 32bpp, which - * doesn't support _D on DCN - */ - - if (has_constant_encode) { - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1)); - } - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 0)); - - if (has_constant_encode) { - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_RETILE, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | - - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | - AMD_FMT_MOD_SET(RB, rb) | - AMD_FMT_MOD_SET(PIPE, pipes)); - } - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_RETILE, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 0) | - AMD_FMT_MOD_SET(RB, rb) | - AMD_FMT_MOD_SET(PIPE, pipes)); - } - - /* - * Only supported for 64bpp on Raven, will be filtered on format in - * dm_plane_format_mod_supported. - */ - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits)); - - if (adev->family == AMDGPU_FAMILY_RV) { - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits)); - } - - /* - * Only supported for 64bpp on Raven, will be filtered on format in - * dm_plane_format_mod_supported. - */ - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); - - if (adev->family == AMDGPU_FAMILY_RV) { - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); - } -} - -static void -add_gfx10_1_modifiers(const struct amdgpu_device *adev, - uint64_t **mods, uint64_t *size, uint64_t *capacity) -{ - int pipe_xor_bits = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_RETILE, 1) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits)); - - - /* Only supported for 64bpp, will be filtered in dm_plane_format_mod_supported */ - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); -} - -static void -add_gfx10_3_modifiers(const struct amdgpu_device *adev, - uint64_t **mods, uint64_t *size, uint64_t *capacity) -{ - int pipe_xor_bits = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes); - int pkrs = ilog2(adev->gfx.config.gb_addr_config_fields.num_pkrs); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(PACKERS, pkrs) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(PACKERS, pkrs) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(PACKERS, pkrs) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_RETILE, 1) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(PACKERS, pkrs) | - AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_RETILE, 1) | - AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(PACKERS, pkrs)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(PACKERS, pkrs)); - - /* Only supported for 64bpp, will be filtered in dm_plane_format_mod_supported */ - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); -} - -static void -add_gfx11_modifiers(struct amdgpu_device *adev, - uint64_t **mods, uint64_t *size, uint64_t *capacity) -{ - int num_pipes = 0; - int pipe_xor_bits = 0; - int num_pkrs = 0; - int pkrs = 0; - u32 gb_addr_config; - u8 i = 0; - unsigned swizzle_r_x; - uint64_t modifier_r_x; - uint64_t modifier_dcc_best; - uint64_t modifier_dcc_4k; - - /* TODO: GFX11 IP HW init hasnt finish and we get zero if we read from - * adev->gfx.config.gb_addr_config_fields.num_{pkrs,pipes} */ - gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG); - ASSERT(gb_addr_config != 0); - - num_pkrs = 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS); - pkrs = ilog2(num_pkrs); - num_pipes = 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PIPES); - pipe_xor_bits = ilog2(num_pipes); - - for (i = 0; i < 2; i++) { - /* Insert the best one first. */ - /* R_X swizzle modes are the best for rendering and DCC requires them. */ - if (num_pipes > 16) - swizzle_r_x = !i ? AMD_FMT_MOD_TILE_GFX11_256K_R_X : AMD_FMT_MOD_TILE_GFX9_64K_R_X; - else - swizzle_r_x = !i ? AMD_FMT_MOD_TILE_GFX9_64K_R_X : AMD_FMT_MOD_TILE_GFX11_256K_R_X; - - modifier_r_x = AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) | - AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | - AMD_FMT_MOD_SET(TILE, swizzle_r_x) | - AMD_FMT_MOD_SET(PACKERS, pkrs); - - /* DCC_CONSTANT_ENCODE is not set because it can't vary with gfx11 (it's implied to be 1). */ - modifier_dcc_best = modifier_r_x | AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 0) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B); - - /* DCC settings for 4K and greater resolutions. (required by display hw) */ - modifier_dcc_4k = modifier_r_x | AMD_FMT_MOD_SET(DCC, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | - AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | - AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B); - - add_modifier(mods, size, capacity, modifier_dcc_best); - add_modifier(mods, size, capacity, modifier_dcc_4k); - - add_modifier(mods, size, capacity, modifier_dcc_best | AMD_FMT_MOD_SET(DCC_RETILE, 1)); - add_modifier(mods, size, capacity, modifier_dcc_4k | AMD_FMT_MOD_SET(DCC_RETILE, 1)); - - add_modifier(mods, size, capacity, modifier_r_x); - } - - add_modifier(mods, size, capacity, AMD_FMT_MOD | - AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) | - AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D)); -} - -static int -get_plane_modifiers(struct amdgpu_device *adev, unsigned int plane_type, uint64_t **mods) -{ - uint64_t size = 0, capacity = 128; - *mods = NULL; - - /* We have not hooked up any pre-GFX9 modifiers. */ - if (adev->family < AMDGPU_FAMILY_AI) - return 0; - - *mods = kmalloc(capacity * sizeof(uint64_t), GFP_KERNEL); - - if (plane_type == DRM_PLANE_TYPE_CURSOR) { - add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_LINEAR); - add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_INVALID); - return *mods ? 0 : -ENOMEM; - } - - switch (adev->family) { - case AMDGPU_FAMILY_AI: - case AMDGPU_FAMILY_RV: - add_gfx9_modifiers(adev, mods, &size, &capacity); - break; - case AMDGPU_FAMILY_NV: - case AMDGPU_FAMILY_VGH: - case AMDGPU_FAMILY_YC: - case AMDGPU_FAMILY_GC_10_3_6: - case AMDGPU_FAMILY_GC_10_3_7: - if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) - add_gfx10_3_modifiers(adev, mods, &size, &capacity); - else - add_gfx10_1_modifiers(adev, mods, &size, &capacity); - break; - case AMDGPU_FAMILY_GC_11_0_0: - case AMDGPU_FAMILY_GC_11_0_2: - add_gfx11_modifiers(adev, mods, &size, &capacity); - break; - } - - add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_LINEAR); - - /* INVALID marks the end of the list. */ - add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_INVALID); - - if (!*mods) - return -ENOMEM; - - return 0; -} - -static int -fill_gfx9_plane_attributes_from_modifiers(struct amdgpu_device *adev, - const struct amdgpu_framebuffer *afb, - const enum surface_pixel_format format, - const enum dc_rotation_angle rotation, - const struct plane_size *plane_size, - union dc_tiling_info *tiling_info, - struct dc_plane_dcc_param *dcc, - struct dc_plane_address *address, - const bool force_disable_dcc) -{ - const uint64_t modifier = afb->base.modifier; - int ret = 0; - - fill_gfx9_tiling_info_from_modifier(adev, tiling_info, modifier); - tiling_info->gfx9.swizzle = modifier_gfx9_swizzle_mode(modifier); - - if (modifier_has_dcc(modifier) && !force_disable_dcc) { - uint64_t dcc_address = afb->address + afb->base.offsets[1]; - bool independent_64b_blks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier); - bool independent_128b_blks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier); - - dcc->enable = 1; - dcc->meta_pitch = afb->base.pitches[1]; - dcc->independent_64b_blks = independent_64b_blks; - if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) { - if (independent_64b_blks && independent_128b_blks) - dcc->dcc_ind_blk = hubp_ind_block_64b_no_128bcl; - else if (independent_128b_blks) - dcc->dcc_ind_blk = hubp_ind_block_128b; - else if (independent_64b_blks && !independent_128b_blks) - dcc->dcc_ind_blk = hubp_ind_block_64b; - else - dcc->dcc_ind_blk = hubp_ind_block_unconstrained; - } else { - if (independent_64b_blks) - dcc->dcc_ind_blk = hubp_ind_block_64b; - else - dcc->dcc_ind_blk = hubp_ind_block_unconstrained; - } - - address->grph.meta_addr.low_part = lower_32_bits(dcc_address); - address->grph.meta_addr.high_part = upper_32_bits(dcc_address); - } - - ret = validate_dcc(adev, format, rotation, tiling_info, dcc, address, plane_size); - if (ret) - drm_dbg_kms(adev_to_drm(adev), "validate_dcc: returned error: %d\n", ret); - - return ret; -} - -static int -fill_plane_buffer_attributes(struct amdgpu_device *adev, - const struct amdgpu_framebuffer *afb, - const enum surface_pixel_format format, - const enum dc_rotation_angle rotation, - const uint64_t tiling_flags, - union dc_tiling_info *tiling_info, - struct plane_size *plane_size, - struct dc_plane_dcc_param *dcc, - struct dc_plane_address *address, - bool tmz_surface, - bool force_disable_dcc) -{ - const struct drm_framebuffer *fb = &afb->base; - int ret; - - memset(tiling_info, 0, sizeof(*tiling_info)); - memset(plane_size, 0, sizeof(*plane_size)); - memset(dcc, 0, sizeof(*dcc)); - memset(address, 0, sizeof(*address)); - - address->tmz_surface = tmz_surface; - - if (format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { - uint64_t addr = afb->address + fb->offsets[0]; - - plane_size->surface_size.x = 0; - plane_size->surface_size.y = 0; - plane_size->surface_size.width = fb->width; - plane_size->surface_size.height = fb->height; - plane_size->surface_pitch = - fb->pitches[0] / fb->format->cpp[0]; - - address->type = PLN_ADDR_TYPE_GRAPHICS; - address->grph.addr.low_part = lower_32_bits(addr); - address->grph.addr.high_part = upper_32_bits(addr); - } else if (format < SURFACE_PIXEL_FORMAT_INVALID) { - uint64_t luma_addr = afb->address + fb->offsets[0]; - uint64_t chroma_addr = afb->address + fb->offsets[1]; - - plane_size->surface_size.x = 0; - plane_size->surface_size.y = 0; - plane_size->surface_size.width = fb->width; - plane_size->surface_size.height = fb->height; - plane_size->surface_pitch = - fb->pitches[0] / fb->format->cpp[0]; - - plane_size->chroma_size.x = 0; - plane_size->chroma_size.y = 0; - /* TODO: set these based on surface format */ - plane_size->chroma_size.width = fb->width / 2; - plane_size->chroma_size.height = fb->height / 2; - - plane_size->chroma_pitch = - fb->pitches[1] / fb->format->cpp[1]; - - address->type = PLN_ADDR_TYPE_VIDEO_PROGRESSIVE; - address->video_progressive.luma_addr.low_part = - lower_32_bits(luma_addr); - address->video_progressive.luma_addr.high_part = - upper_32_bits(luma_addr); - address->video_progressive.chroma_addr.low_part = - lower_32_bits(chroma_addr); - address->video_progressive.chroma_addr.high_part = - upper_32_bits(chroma_addr); - } - - if (adev->family >= AMDGPU_FAMILY_AI) { - ret = fill_gfx9_plane_attributes_from_modifiers(adev, afb, format, - rotation, plane_size, - tiling_info, dcc, - address, - force_disable_dcc); - if (ret) - return ret; - } else { - fill_gfx8_tiling_info_from_flags(tiling_info, tiling_flags); - } - - return 0; -} - -static void -fill_blending_from_plane_state(const struct drm_plane_state *plane_state, - bool *per_pixel_alpha, bool *pre_multiplied_alpha, - bool *global_alpha, int *global_alpha_value) -{ - *per_pixel_alpha = false; - *pre_multiplied_alpha = true; - *global_alpha = false; - *global_alpha_value = 0xff; - - if (plane_state->plane->type != DRM_PLANE_TYPE_OVERLAY) - return; - - if (plane_state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI || - plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) { - static const uint32_t alpha_formats[] = { - DRM_FORMAT_ARGB8888, - DRM_FORMAT_RGBA8888, - DRM_FORMAT_ABGR8888, - }; - uint32_t format = plane_state->fb->format->format; - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(alpha_formats); ++i) { - if (format == alpha_formats[i]) { - *per_pixel_alpha = true; - break; - } - } - - if (*per_pixel_alpha && plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) - *pre_multiplied_alpha = false; - } - - if (plane_state->alpha < 0xffff) { - *global_alpha = true; - *global_alpha_value = plane_state->alpha >> 8; - } -} - -static int -fill_plane_color_attributes(const struct drm_plane_state *plane_state, - const enum surface_pixel_format format, - enum dc_color_space *color_space) -{ - bool full_range; - - *color_space = COLOR_SPACE_SRGB; - - /* DRM color properties only affect non-RGB formats. */ - if (format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) - return 0; - - full_range = (plane_state->color_range == DRM_COLOR_YCBCR_FULL_RANGE); - - switch (plane_state->color_encoding) { - case DRM_COLOR_YCBCR_BT601: - if (full_range) - *color_space = COLOR_SPACE_YCBCR601; - else - *color_space = COLOR_SPACE_YCBCR601_LIMITED; - break; - - case DRM_COLOR_YCBCR_BT709: - if (full_range) - *color_space = COLOR_SPACE_YCBCR709; - else - *color_space = COLOR_SPACE_YCBCR709_LIMITED; - break; - - case DRM_COLOR_YCBCR_BT2020: - if (full_range) - *color_space = COLOR_SPACE_2020_YCBCR; - else - return -EINVAL; - break; - - default: - return -EINVAL; - } - - return 0; -} - -static int -fill_dc_plane_info_and_addr(struct amdgpu_device *adev, - const struct drm_plane_state *plane_state, - const uint64_t tiling_flags, - struct dc_plane_info *plane_info, - struct dc_plane_address *address, - bool tmz_surface, - bool force_disable_dcc) -{ - const struct drm_framebuffer *fb = plane_state->fb; - const struct amdgpu_framebuffer *afb = - to_amdgpu_framebuffer(plane_state->fb); - int ret; - - memset(plane_info, 0, sizeof(*plane_info)); - - switch (fb->format->format) { - case DRM_FORMAT_C8: - plane_info->format = - SURFACE_PIXEL_FORMAT_GRPH_PALETA_256_COLORS; - break; - case DRM_FORMAT_RGB565: - plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_RGB565; - break; - case DRM_FORMAT_XRGB8888: - case DRM_FORMAT_ARGB8888: - plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB8888; - break; - case DRM_FORMAT_XRGB2101010: - case DRM_FORMAT_ARGB2101010: - plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010; - break; - case DRM_FORMAT_XBGR2101010: - case DRM_FORMAT_ABGR2101010: - plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010; - break; - case DRM_FORMAT_XBGR8888: - case DRM_FORMAT_ABGR8888: - plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR8888; - break; - case DRM_FORMAT_NV21: - plane_info->format = SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr; - break; - case DRM_FORMAT_NV12: - plane_info->format = SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb; - break; - case DRM_FORMAT_P010: - plane_info->format = SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb; - break; - case DRM_FORMAT_XRGB16161616F: - case DRM_FORMAT_ARGB16161616F: - plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F; - break; - case DRM_FORMAT_XBGR16161616F: - case DRM_FORMAT_ABGR16161616F: - plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F; - break; - case DRM_FORMAT_XRGB16161616: - case DRM_FORMAT_ARGB16161616: - plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616; - break; - case DRM_FORMAT_XBGR16161616: - case DRM_FORMAT_ABGR16161616: - plane_info->format = SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616; - break; - default: - DRM_ERROR( - "Unsupported screen format %p4cc\n", - &fb->format->format); - return -EINVAL; - } - - switch (plane_state->rotation & DRM_MODE_ROTATE_MASK) { - case DRM_MODE_ROTATE_0: - plane_info->rotation = ROTATION_ANGLE_0; - break; - case DRM_MODE_ROTATE_90: - plane_info->rotation = ROTATION_ANGLE_90; - break; - case DRM_MODE_ROTATE_180: - plane_info->rotation = ROTATION_ANGLE_180; - break; - case DRM_MODE_ROTATE_270: - plane_info->rotation = ROTATION_ANGLE_270; - break; - default: - plane_info->rotation = ROTATION_ANGLE_0; - break; - } - - plane_info->visible = true; - plane_info->stereo_format = PLANE_STEREO_FORMAT_NONE; - - plane_info->layer_index = 0; - - ret = fill_plane_color_attributes(plane_state, plane_info->format, - &plane_info->color_space); - if (ret) - return ret; - - ret = fill_plane_buffer_attributes(adev, afb, plane_info->format, - plane_info->rotation, tiling_flags, - &plane_info->tiling_info, - &plane_info->plane_size, - &plane_info->dcc, address, tmz_surface, - force_disable_dcc); - if (ret) - return ret; - - fill_blending_from_plane_state( - plane_state, &plane_info->per_pixel_alpha, &plane_info->pre_multiplied_alpha, - &plane_info->global_alpha, &plane_info->global_alpha_value); - - return 0; -} - -static int fill_dc_plane_attributes(struct amdgpu_device *adev, - struct dc_plane_state *dc_plane_state, - struct drm_plane_state *plane_state, - struct drm_crtc_state *crtc_state) -{ - struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state); - struct amdgpu_framebuffer *afb = (struct amdgpu_framebuffer *)plane_state->fb; - struct dc_scaling_info scaling_info; - struct dc_plane_info plane_info; - int ret; - bool force_disable_dcc = false; - - ret = fill_dc_scaling_info(adev, plane_state, &scaling_info); - if (ret) - return ret; - - dc_plane_state->src_rect = scaling_info.src_rect; - dc_plane_state->dst_rect = scaling_info.dst_rect; - dc_plane_state->clip_rect = scaling_info.clip_rect; - dc_plane_state->scaling_quality = scaling_info.scaling_quality; - - force_disable_dcc = adev->asic_type == CHIP_RAVEN && adev->in_suspend; - ret = fill_dc_plane_info_and_addr(adev, plane_state, - afb->tiling_flags, - &plane_info, - &dc_plane_state->address, - afb->tmz_surface, - force_disable_dcc); - if (ret) - return ret; - - dc_plane_state->format = plane_info.format; - dc_plane_state->color_space = plane_info.color_space; - dc_plane_state->format = plane_info.format; - dc_plane_state->plane_size = plane_info.plane_size; - dc_plane_state->rotation = plane_info.rotation; - dc_plane_state->horizontal_mirror = plane_info.horizontal_mirror; - dc_plane_state->stereo_format = plane_info.stereo_format; - dc_plane_state->tiling_info = plane_info.tiling_info; - dc_plane_state->visible = plane_info.visible; - dc_plane_state->per_pixel_alpha = plane_info.per_pixel_alpha; - dc_plane_state->pre_multiplied_alpha = plane_info.pre_multiplied_alpha; - dc_plane_state->global_alpha = plane_info.global_alpha; - dc_plane_state->global_alpha_value = plane_info.global_alpha_value; - dc_plane_state->dcc = plane_info.dcc; - dc_plane_state->layer_index = plane_info.layer_index; // Always returns 0 - dc_plane_state->flip_int_enabled = true; - - /* - * Always set input transfer function, since plane state is refreshed - * every time. - */ - ret = amdgpu_dm_update_plane_color_mgmt(dm_crtc_state, dc_plane_state); - if (ret) - return ret; - - return 0; -} - -/** - * fill_dc_dirty_rects() - Fill DC dirty regions for PSR selective updates - * - * @plane: DRM plane containing dirty regions that need to be flushed to the eDP - * remote fb - * @old_plane_state: Old state of @plane - * @new_plane_state: New state of @plane - * @crtc_state: New state of CRTC connected to the @plane - * @flip_addrs: DC flip tracking struct, which also tracts dirty rects - * - * For PSR SU, DC informs the DMUB uController of dirty rectangle regions - * (referred to as "damage clips" in DRM nomenclature) that require updating on - * the eDP remote buffer. The responsibility of specifying the dirty regions is - * amdgpu_dm's. - * - * A damage-aware DRM client should fill the FB_DAMAGE_CLIPS property on the - * plane with regions that require flushing to the eDP remote buffer. In - * addition, certain use cases - such as cursor and multi-plane overlay (MPO) - - * implicitly provide damage clips without any client support via the plane - * bounds. - * - * Today, amdgpu_dm only supports the MPO and cursor usecase. - * - * TODO: Also enable for FB_DAMAGE_CLIPS - */ -static void fill_dc_dirty_rects(struct drm_plane *plane, - struct drm_plane_state *old_plane_state, - struct drm_plane_state *new_plane_state, - struct drm_crtc_state *crtc_state, - struct dc_flip_addrs *flip_addrs) -{ - struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state); - struct rect *dirty_rects = flip_addrs->dirty_rects; - uint32_t num_clips; - bool bb_changed; - bool fb_changed; - uint32_t i = 0; - - flip_addrs->dirty_rect_count = 0; + flip_addrs->dirty_rect_count = 0; /* * Cursor plane has it's own dirty rect update interface. See @@ -6168,7 +5277,7 @@ static void fill_stream_properties_from_drm_display_mode( timing_out->scan_type = SCANNING_TYPE_NODATA; timing_out->hdmi_vic = 0; - if(old_stream) { + if (old_stream) { timing_out->vic = old_stream->timing.vic; timing_out->flags.HSYNC_POSITIVE_POLARITY = old_stream->timing.flags.HSYNC_POSITIVE_POLARITY; timing_out->flags.VSYNC_POSITIVE_POLARITY = old_stream->timing.flags.VSYNC_POSITIVE_POLARITY; @@ -6390,16 +5499,126 @@ static void dm_enable_per_frame_crtc_master_sync(struct dc_state *context) } } +/** + * DOC: FreeSync Video + * + * When a userspace application wants to play a video, the content follows a + * standard format definition that usually specifies the FPS for that format. + * The below list illustrates some video format and the expected FPS, + * respectively: + * + * - TV/NTSC (23.976 FPS) + * - Cinema (24 FPS) + * - TV/PAL (25 FPS) + * - TV/NTSC (29.97 FPS) + * - TV/NTSC (30 FPS) + * - Cinema HFR (48 FPS) + * - TV/PAL (50 FPS) + * - Commonly used (60 FPS) + * - Multiples of 24 (48,72,96 FPS) + * + * The list of standards video format is not huge and can be added to the + * connector modeset list beforehand. With that, userspace can leverage + * FreeSync to extends the front porch in order to attain the target refresh + * rate. Such a switch will happen seamlessly, without screen blanking or + * reprogramming of the output in any other way. If the userspace requests a + * modesetting change compatible with FreeSync modes that only differ in the + * refresh rate, DC will skip the full update and avoid blink during the + * transition. For example, the video player can change the modesetting from + * 60Hz to 30Hz for playing TV/NTSC content when it goes full screen without + * causing any display blink. This same concept can be applied to a mode + * setting change. + */ +static struct drm_display_mode * +get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector, + bool use_probed_modes) +{ + struct drm_display_mode *m, *m_pref = NULL; + u16 current_refresh, highest_refresh; + struct list_head *list_head = use_probed_modes ? + &aconnector->base.probed_modes : + &aconnector->base.modes; + + if (aconnector->freesync_vid_base.clock != 0) + return &aconnector->freesync_vid_base; + + /* Find the preferred mode */ + list_for_each_entry (m, list_head, head) { + if (m->type & DRM_MODE_TYPE_PREFERRED) { + m_pref = m; + break; + } + } + + if (!m_pref) { + /* Probably an EDID with no preferred mode. Fallback to first entry */ + m_pref = list_first_entry_or_null( + &aconnector->base.modes, struct drm_display_mode, head); + if (!m_pref) { + DRM_DEBUG_DRIVER("No preferred mode found in EDID\n"); + return NULL; + } + } + + highest_refresh = drm_mode_vrefresh(m_pref); + + /* + * Find the mode with highest refresh rate with same resolution. + * For some monitors, preferred mode is not the mode with highest + * supported refresh rate. + */ + list_for_each_entry (m, list_head, head) { + current_refresh = drm_mode_vrefresh(m); + + if (m->hdisplay == m_pref->hdisplay && + m->vdisplay == m_pref->vdisplay && + highest_refresh < current_refresh) { + highest_refresh = current_refresh; + m_pref = m; + } + } + + drm_mode_copy(&aconnector->freesync_vid_base, m_pref); + return m_pref; +} + +static bool is_freesync_video_mode(const struct drm_display_mode *mode, + struct amdgpu_dm_connector *aconnector) +{ + struct drm_display_mode *high_mode; + int timing_diff; + + high_mode = get_highest_refresh_rate_mode(aconnector, false); + if (!high_mode || !mode) + return false; + + timing_diff = high_mode->vtotal - mode->vtotal; + + if (high_mode->clock == 0 || high_mode->clock != mode->clock || + high_mode->hdisplay != mode->hdisplay || + high_mode->vdisplay != mode->vdisplay || + high_mode->hsync_start != mode->hsync_start || + high_mode->hsync_end != mode->hsync_end || + high_mode->htotal != mode->htotal || + high_mode->hskew != mode->hskew || + high_mode->vscan != mode->vscan || + high_mode->vsync_start - mode->vsync_start != timing_diff || + high_mode->vsync_end - mode->vsync_end != timing_diff) + return false; + else + return true; +} + #if defined(CONFIG_DRM_AMD_DC_DCN) static void update_dsc_caps(struct amdgpu_dm_connector *aconnector, - struct dc_sink *sink, struct dc_stream_state *stream, - struct dsc_dec_dpcd_caps *dsc_caps) + struct dc_sink *sink, struct dc_stream_state *stream, + struct dsc_dec_dpcd_caps *dsc_caps) { stream->timing.flags.DSC = 0; dsc_caps->is_dsc_supported = false; if (aconnector->dc_link && (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT || - sink->sink_signal == SIGNAL_TYPE_EDP)) { + sink->sink_signal == SIGNAL_TYPE_EDP)) { if (sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_NONE || sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER) dc_dsc_parse_dsc_dpcd(aconnector->dc_link->ctx->dc, @@ -6409,6 +5628,7 @@ static void update_dsc_caps(struct amdgpu_dm_connector *aconnector, } } + static void apply_dsc_policy_for_edp(struct amdgpu_dm_connector *aconnector, struct dc_sink *sink, struct dc_stream_state *stream, struct dsc_dec_dpcd_caps *dsc_caps, @@ -6467,9 +5687,10 @@ static void apply_dsc_policy_for_edp(struct amdgpu_dm_connector *aconnector, } } + static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector, - struct dc_sink *sink, struct dc_stream_state *stream, - struct dsc_dec_dpcd_caps *dsc_caps) + struct dc_sink *sink, struct dc_stream_state *stream, + struct dsc_dec_dpcd_caps *dsc_caps) { struct drm_connector *drm_connector = &aconnector->base; uint32_t link_bandwidth_kbps; @@ -6480,7 +5701,6 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector, link_bandwidth_kbps = dc_link_bandwidth_kbps(aconnector->dc_link, dc_link_get_link_cap(aconnector->dc_link)); - if (stream->link && stream->link->local_sink) max_dsc_target_bpp_limit_override = stream->link->local_sink->edid_caps.panel_patch.max_dsc_target_bpp_limit; @@ -6504,8 +5724,7 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector, &stream->timing, &stream->timing.dsc_cfg)) { stream->timing.flags.DSC = 1; - DRM_DEBUG_DRIVER("%s: [%s] DSC is selected from SST RX\n", - __func__, drm_connector->name); + DRM_DEBUG_DRIVER("%s: [%s] DSC is selected from SST RX\n", __func__, drm_connector->name); } } else if (sink->link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER) { timing_bw_in_kbps = dc_bandwidth_in_kbps_from_timing(&stream->timing); @@ -6536,123 +5755,13 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector, if (stream->timing.flags.DSC && aconnector->dsc_settings.dsc_num_slices_h) stream->timing.dsc_cfg.num_slices_h = aconnector->dsc_settings.dsc_num_slices_h; - if (stream->timing.flags.DSC && aconnector->dsc_settings.dsc_num_slices_v) - stream->timing.dsc_cfg.num_slices_v = aconnector->dsc_settings.dsc_num_slices_v; - - if (stream->timing.flags.DSC && aconnector->dsc_settings.dsc_bits_per_pixel) - stream->timing.dsc_cfg.bits_per_pixel = aconnector->dsc_settings.dsc_bits_per_pixel; -} -#endif /* CONFIG_DRM_AMD_DC_DCN */ - -/** - * DOC: FreeSync Video - * - * When a userspace application wants to play a video, the content follows a - * standard format definition that usually specifies the FPS for that format. - * The below list illustrates some video format and the expected FPS, - * respectively: - * - * - TV/NTSC (23.976 FPS) - * - Cinema (24 FPS) - * - TV/PAL (25 FPS) - * - TV/NTSC (29.97 FPS) - * - TV/NTSC (30 FPS) - * - Cinema HFR (48 FPS) - * - TV/PAL (50 FPS) - * - Commonly used (60 FPS) - * - Multiples of 24 (48,72,96,120 FPS) - * - * The list of standards video format is not huge and can be added to the - * connector modeset list beforehand. With that, userspace can leverage - * FreeSync to extends the front porch in order to attain the target refresh - * rate. Such a switch will happen seamlessly, without screen blanking or - * reprogramming of the output in any other way. If the userspace requests a - * modesetting change compatible with FreeSync modes that only differ in the - * refresh rate, DC will skip the full update and avoid blink during the - * transition. For example, the video player can change the modesetting from - * 60Hz to 30Hz for playing TV/NTSC content when it goes full screen without - * causing any display blink. This same concept can be applied to a mode - * setting change. - */ -static struct drm_display_mode * -get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector, - bool use_probed_modes) -{ - struct drm_display_mode *m, *m_pref = NULL; - u16 current_refresh, highest_refresh; - struct list_head *list_head = use_probed_modes ? - &aconnector->base.probed_modes : - &aconnector->base.modes; - - if (aconnector->freesync_vid_base.clock != 0) - return &aconnector->freesync_vid_base; - - /* Find the preferred mode */ - list_for_each_entry (m, list_head, head) { - if (m->type & DRM_MODE_TYPE_PREFERRED) { - m_pref = m; - break; - } - } - - if (!m_pref) { - /* Probably an EDID with no preferred mode. Fallback to first entry */ - m_pref = list_first_entry_or_null( - &aconnector->base.modes, struct drm_display_mode, head); - if (!m_pref) { - DRM_DEBUG_DRIVER("No preferred mode found in EDID\n"); - return NULL; - } - } - - highest_refresh = drm_mode_vrefresh(m_pref); - - /* - * Find the mode with highest refresh rate with same resolution. - * For some monitors, preferred mode is not the mode with highest - * supported refresh rate. - */ - list_for_each_entry (m, list_head, head) { - current_refresh = drm_mode_vrefresh(m); - - if (m->hdisplay == m_pref->hdisplay && - m->vdisplay == m_pref->vdisplay && - highest_refresh < current_refresh) { - highest_refresh = current_refresh; - m_pref = m; - } - } - - drm_mode_copy(&aconnector->freesync_vid_base, m_pref); - return m_pref; -} - -static bool is_freesync_video_mode(const struct drm_display_mode *mode, - struct amdgpu_dm_connector *aconnector) -{ - struct drm_display_mode *high_mode; - int timing_diff; - - high_mode = get_highest_refresh_rate_mode(aconnector, false); - if (!high_mode || !mode) - return false; - - timing_diff = high_mode->vtotal - mode->vtotal; + if (stream->timing.flags.DSC && aconnector->dsc_settings.dsc_num_slices_v) + stream->timing.dsc_cfg.num_slices_v = aconnector->dsc_settings.dsc_num_slices_v; - if (high_mode->clock == 0 || high_mode->clock != mode->clock || - high_mode->hdisplay != mode->hdisplay || - high_mode->vdisplay != mode->vdisplay || - high_mode->hsync_start != mode->hsync_start || - high_mode->hsync_end != mode->hsync_end || - high_mode->htotal != mode->htotal || - high_mode->hskew != mode->hskew || - high_mode->vscan != mode->vscan || - high_mode->vsync_start - mode->vsync_start != timing_diff || - high_mode->vsync_end - mode->vsync_end != timing_diff) - return false; - else - return true; + if (stream->timing.flags.DSC && aconnector->dsc_settings.dsc_bits_per_pixel) + stream->timing.dsc_cfg.bits_per_pixel = aconnector->dsc_settings.dsc_bits_per_pixel; } +#endif /* CONFIG_DRM_AMD_DC_DCN */ static struct dc_stream_state * create_stream_for_sink(struct amdgpu_dm_connector *aconnector, @@ -6677,6 +5786,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, #if defined(CONFIG_DRM_AMD_DC_DCN) struct dsc_dec_dpcd_caps dsc_caps; #endif + struct dc_sink *sink = NULL; memset(&saved_mode, 0, sizeof(saved_mode)); @@ -6740,7 +5850,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, drm_mode_copy(&mode, freesync_mode); } else { decide_crtc_timing_for_drm_display_mode( - &mode, preferred_mode, scale); + &mode, preferred_mode, scale); preferred_refresh = drm_mode_vrefresh(preferred_mode); } @@ -6751,7 +5861,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, else if (!dm_state) drm_mode_set_crtcinfo(&mode, 0); - /* + /* * If scaling is enabled and refresh rate didn't change * we copy the vic and polarities of the old timings */ @@ -6999,7 +6109,8 @@ amdgpu_dm_connector_detect(struct drm_connector *connector, bool force) !aconnector->fake_enable) connected = (aconnector->dc_sink != NULL); else - connected = (aconnector->base.force == DRM_FORCE_ON); + connected = (aconnector->base.force == DRM_FORCE_ON || + aconnector->base.force == DRM_FORCE_ON_DIGITAL); update_subconnector_property(aconnector); @@ -7123,18 +6234,21 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector) int i; /* - * Call only if mst_mgr was iniitalized before since it's not done + * Call only if mst_mgr was initialized before since it's not done * for all connector types. */ if (aconnector->mst_mgr.dev) drm_dp_mst_topology_mgr_destroy(&aconnector->mst_mgr); +#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\ + defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) for (i = 0; i < dm->num_of_edps; i++) { if ((link == dm->backlight_link[i]) && dm->backlight_dev[i]) { backlight_device_unregister(dm->backlight_dev[i]); dm->backlight_dev[i] = NULL; } } +#endif if (aconnector->dc_em_sink) dc_sink_release(aconnector->dc_em_sink); @@ -7175,6 +6289,7 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector *connector) state->base.max_requested_bpc = 8; state->vcpi_slots = 0; state->pbn = 0; + if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) state->abm_level = amdgpu_dm_abm_level; @@ -7564,10 +6679,10 @@ static void dm_update_crtc_active_planes(struct drm_crtc *crtc, } static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc, - struct drm_atomic_state *state) + struct drm_atomic_state *state) { struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, - crtc); + crtc); struct amdgpu_device *adev = drm_to_adev(crtc->dev); struct dc *dc = adev->dm.dc; struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state); @@ -7578,7 +6693,7 @@ static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc, dm_update_crtc_active_planes(crtc, crtc_state); if (WARN_ON(unlikely(!dm_crtc_state->stream && - modeset_required(crtc_state, NULL, dm_crtc_state->stream)))) { + modeset_required(crtc_state, NULL, dm_crtc_state->stream)))) { return ret; } @@ -7589,7 +6704,7 @@ static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc, * userspace which stops using the HW cursor altogether in response to the resulting EINVAL. */ if (crtc_state->enable && - !(crtc_state->plane_mask & drm_plane_mask(crtc->primary))) { + !(crtc_state->plane_mask & drm_plane_mask(crtc->primary))) { DRM_DEBUG_ATOMIC("Can't enable a CRTC without enabling the primary plane\n"); return -EINVAL; } @@ -7627,21 +6742,21 @@ static void dm_encoder_helper_disable(struct drm_encoder *encoder) int convert_dc_color_depth_into_bpc(enum dc_color_depth display_color_depth) { switch (display_color_depth) { - case COLOR_DEPTH_666: - return 6; - case COLOR_DEPTH_888: - return 8; - case COLOR_DEPTH_101010: - return 10; - case COLOR_DEPTH_121212: - return 12; - case COLOR_DEPTH_141414: - return 14; - case COLOR_DEPTH_161616: - return 16; - default: - break; - } + case COLOR_DEPTH_666: + return 6; + case COLOR_DEPTH_888: + return 8; + case COLOR_DEPTH_101010: + return 10; + case COLOR_DEPTH_121212: + return 12; + case COLOR_DEPTH_141414: + return 14; + case COLOR_DEPTH_161616: + return 16; + default: + break; + } return 0; } @@ -7672,7 +6787,7 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder, if (!state->duplicated) { int max_bpc = conn_state->max_requested_bpc; is_y420 = drm_mode_is_420_also(&connector->display_info, adjusted_mode) && - aconnector->force_yuv420_output; + aconnector->force_yuv420_output; color_depth = convert_color_depth_from_display_info(connector, is_y420, max_bpc); @@ -7727,7 +6842,7 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, if (!stream) continue; - if ((struct amdgpu_dm_connector*)stream->dm_stream_context == aconnector) + if ((struct amdgpu_dm_connector *)stream->dm_stream_context == aconnector) break; stream = NULL; @@ -7776,475 +6891,6 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, } #endif -static void dm_drm_plane_reset(struct drm_plane *plane) -{ - struct dm_plane_state *amdgpu_state = NULL; - - if (plane->state) - plane->funcs->atomic_destroy_state(plane, plane->state); - - amdgpu_state = kzalloc(sizeof(*amdgpu_state), GFP_KERNEL); - WARN_ON(amdgpu_state == NULL); - - if (amdgpu_state) - __drm_atomic_helper_plane_reset(plane, &amdgpu_state->base); -} - -static struct drm_plane_state * -dm_drm_plane_duplicate_state(struct drm_plane *plane) -{ - struct dm_plane_state *dm_plane_state, *old_dm_plane_state; - - old_dm_plane_state = to_dm_plane_state(plane->state); - dm_plane_state = kzalloc(sizeof(*dm_plane_state), GFP_KERNEL); - if (!dm_plane_state) - return NULL; - - __drm_atomic_helper_plane_duplicate_state(plane, &dm_plane_state->base); - - if (old_dm_plane_state->dc_state) { - dm_plane_state->dc_state = old_dm_plane_state->dc_state; - dc_plane_state_retain(dm_plane_state->dc_state); - } - - return &dm_plane_state->base; -} - -static void dm_drm_plane_destroy_state(struct drm_plane *plane, - struct drm_plane_state *state) -{ - struct dm_plane_state *dm_plane_state = to_dm_plane_state(state); - - if (dm_plane_state->dc_state) - dc_plane_state_release(dm_plane_state->dc_state); - - drm_atomic_helper_plane_destroy_state(plane, state); -} - -static const struct drm_plane_funcs dm_plane_funcs = { - .update_plane = drm_atomic_helper_update_plane, - .disable_plane = drm_atomic_helper_disable_plane, - .destroy = drm_primary_helper_destroy, - .reset = dm_drm_plane_reset, - .atomic_duplicate_state = dm_drm_plane_duplicate_state, - .atomic_destroy_state = dm_drm_plane_destroy_state, - .format_mod_supported = dm_plane_format_mod_supported, -}; - -static int dm_plane_helper_prepare_fb(struct drm_plane *plane, - struct drm_plane_state *new_state) -{ - struct amdgpu_framebuffer *afb; - struct drm_gem_object *obj; - struct amdgpu_device *adev; - struct amdgpu_bo *rbo; - struct dm_plane_state *dm_plane_state_new, *dm_plane_state_old; - uint32_t domain; - int r; - - if (!new_state->fb) { - DRM_DEBUG_KMS("No FB bound\n"); - return 0; - } - - afb = to_amdgpu_framebuffer(new_state->fb); - obj = new_state->fb->obj[0]; - rbo = gem_to_amdgpu_bo(obj); - adev = amdgpu_ttm_adev(rbo->tbo.bdev); - - r = amdgpu_bo_reserve(rbo, true); - if (r) { - dev_err(adev->dev, "fail to reserve bo (%d)\n", r); - return r; - } - - r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1); - if (r) { - dev_err(adev->dev, "reserving fence slot failed (%d)\n", r); - goto error_unlock; - } - - if (plane->type != DRM_PLANE_TYPE_CURSOR) - domain = amdgpu_display_supported_domains(adev, rbo->flags); - else - domain = AMDGPU_GEM_DOMAIN_VRAM; - - r = amdgpu_bo_pin(rbo, domain); - if (unlikely(r != 0)) { - if (r != -ERESTARTSYS) - DRM_ERROR("Failed to pin framebuffer with error %d\n", r); - goto error_unlock; - } - - r = amdgpu_ttm_alloc_gart(&rbo->tbo); - if (unlikely(r != 0)) { - DRM_ERROR("%p bind failed\n", rbo); - goto error_unpin; - } - - r = drm_gem_plane_helper_prepare_fb(plane, new_state); - if (unlikely(r != 0)) - goto error_unpin; - - amdgpu_bo_unreserve(rbo); - - afb->address = amdgpu_bo_gpu_offset(rbo); - - amdgpu_bo_ref(rbo); - - /** - * We don't do surface updates on planes that have been newly created, - * but we also don't have the afb->address during atomic check. - * - * Fill in buffer attributes depending on the address here, but only on - * newly created planes since they're not being used by DC yet and this - * won't modify global state. - */ - dm_plane_state_old = to_dm_plane_state(plane->state); - dm_plane_state_new = to_dm_plane_state(new_state); - - if (dm_plane_state_new->dc_state && - dm_plane_state_old->dc_state != dm_plane_state_new->dc_state) { - struct dc_plane_state *plane_state = - dm_plane_state_new->dc_state; - bool force_disable_dcc = !plane_state->dcc.enable; - - fill_plane_buffer_attributes( - adev, afb, plane_state->format, plane_state->rotation, - afb->tiling_flags, - &plane_state->tiling_info, &plane_state->plane_size, - &plane_state->dcc, &plane_state->address, - afb->tmz_surface, force_disable_dcc); - } - - return 0; - -error_unpin: - amdgpu_bo_unpin(rbo); - -error_unlock: - amdgpu_bo_unreserve(rbo); - return r; -} - -static void dm_plane_helper_cleanup_fb(struct drm_plane *plane, - struct drm_plane_state *old_state) -{ - struct amdgpu_bo *rbo; - int r; - - if (!old_state->fb) - return; - - rbo = gem_to_amdgpu_bo(old_state->fb->obj[0]); - r = amdgpu_bo_reserve(rbo, false); - if (unlikely(r)) { - DRM_ERROR("failed to reserve rbo before unpin\n"); - return; - } - - amdgpu_bo_unpin(rbo); - amdgpu_bo_unreserve(rbo); - amdgpu_bo_unref(&rbo); -} - -static int dm_plane_helper_check_state(struct drm_plane_state *state, - struct drm_crtc_state *new_crtc_state) -{ - struct drm_framebuffer *fb = state->fb; - int min_downscale, max_upscale; - int min_scale = 0; - int max_scale = INT_MAX; - - /* Plane enabled? Validate viewport and get scaling factors from plane caps. */ - if (fb && state->crtc) { - /* Validate viewport to cover the case when only the position changes */ - if (state->plane->type != DRM_PLANE_TYPE_CURSOR) { - int viewport_width = state->crtc_w; - int viewport_height = state->crtc_h; - - if (state->crtc_x < 0) - viewport_width += state->crtc_x; - else if (state->crtc_x + state->crtc_w > new_crtc_state->mode.crtc_hdisplay) - viewport_width = new_crtc_state->mode.crtc_hdisplay - state->crtc_x; - - if (state->crtc_y < 0) - viewport_height += state->crtc_y; - else if (state->crtc_y + state->crtc_h > new_crtc_state->mode.crtc_vdisplay) - viewport_height = new_crtc_state->mode.crtc_vdisplay - state->crtc_y; - - if (viewport_width < 0 || viewport_height < 0) { - DRM_DEBUG_ATOMIC("Plane completely outside of screen\n"); - return -EINVAL; - } else if (viewport_width < MIN_VIEWPORT_SIZE*2) { /* x2 for width is because of pipe-split. */ - DRM_DEBUG_ATOMIC("Viewport width %d smaller than %d\n", viewport_width, MIN_VIEWPORT_SIZE*2); - return -EINVAL; - } else if (viewport_height < MIN_VIEWPORT_SIZE) { - DRM_DEBUG_ATOMIC("Viewport height %d smaller than %d\n", viewport_height, MIN_VIEWPORT_SIZE); - return -EINVAL; - } - - } - - /* Get min/max allowed scaling factors from plane caps. */ - get_min_max_dc_plane_scaling(state->crtc->dev, fb, - &min_downscale, &max_upscale); - /* - * Convert to drm convention: 16.16 fixed point, instead of dc's - * 1.0 == 1000. Also drm scaling is src/dst instead of dc's - * dst/src, so min_scale = 1.0 / max_upscale, etc. - */ - min_scale = (1000 << 16) / max_upscale; - max_scale = (1000 << 16) / min_downscale; - } - - return drm_atomic_helper_check_plane_state( - state, new_crtc_state, min_scale, max_scale, true, true); -} - -static int dm_plane_atomic_check(struct drm_plane *plane, - struct drm_atomic_state *state) -{ - struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, - plane); - struct amdgpu_device *adev = drm_to_adev(plane->dev); - struct dc *dc = adev->dm.dc; - struct dm_plane_state *dm_plane_state; - struct dc_scaling_info scaling_info; - struct drm_crtc_state *new_crtc_state; - int ret; - - trace_amdgpu_dm_plane_atomic_check(new_plane_state); - - dm_plane_state = to_dm_plane_state(new_plane_state); - - if (!dm_plane_state->dc_state) - return 0; - - new_crtc_state = - drm_atomic_get_new_crtc_state(state, - new_plane_state->crtc); - if (!new_crtc_state) - return -EINVAL; - - ret = dm_plane_helper_check_state(new_plane_state, new_crtc_state); - if (ret) - return ret; - - ret = fill_dc_scaling_info(adev, new_plane_state, &scaling_info); - if (ret) - return ret; - - if (dc_validate_plane(dc, dm_plane_state->dc_state) == DC_OK) - return 0; - - return -EINVAL; -} - -static int dm_plane_atomic_async_check(struct drm_plane *plane, - struct drm_atomic_state *state) -{ - /* Only support async updates on cursor planes. */ - if (plane->type != DRM_PLANE_TYPE_CURSOR) - return -EINVAL; - - return 0; -} - -static void dm_plane_atomic_async_update(struct drm_plane *plane, - struct drm_atomic_state *state) -{ - struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, - plane); - struct drm_plane_state *old_state = - drm_atomic_get_old_plane_state(state, plane); - - trace_amdgpu_dm_atomic_update_cursor(new_state); - - swap(plane->state->fb, new_state->fb); - - plane->state->src_x = new_state->src_x; - plane->state->src_y = new_state->src_y; - plane->state->src_w = new_state->src_w; - plane->state->src_h = new_state->src_h; - plane->state->crtc_x = new_state->crtc_x; - plane->state->crtc_y = new_state->crtc_y; - plane->state->crtc_w = new_state->crtc_w; - plane->state->crtc_h = new_state->crtc_h; - - handle_cursor_update(plane, old_state); -} - -static const struct drm_plane_helper_funcs dm_plane_helper_funcs = { - .prepare_fb = dm_plane_helper_prepare_fb, - .cleanup_fb = dm_plane_helper_cleanup_fb, - .atomic_check = dm_plane_atomic_check, - .atomic_async_check = dm_plane_atomic_async_check, - .atomic_async_update = dm_plane_atomic_async_update -}; - -/* - * TODO: these are currently initialized to rgb formats only. - * For future use cases we should either initialize them dynamically based on - * plane capabilities, or initialize this array to all formats, so internal drm - * check will succeed, and let DC implement proper check - */ -static const uint32_t rgb_formats[] = { - DRM_FORMAT_XRGB8888, - DRM_FORMAT_ARGB8888, - DRM_FORMAT_RGBA8888, - DRM_FORMAT_XRGB2101010, - DRM_FORMAT_XBGR2101010, - DRM_FORMAT_ARGB2101010, - DRM_FORMAT_ABGR2101010, - DRM_FORMAT_XRGB16161616, - DRM_FORMAT_XBGR16161616, - DRM_FORMAT_ARGB16161616, - DRM_FORMAT_ABGR16161616, - DRM_FORMAT_XBGR8888, - DRM_FORMAT_ABGR8888, - DRM_FORMAT_RGB565, -}; - -static const uint32_t overlay_formats[] = { - DRM_FORMAT_XRGB8888, - DRM_FORMAT_ARGB8888, - DRM_FORMAT_RGBA8888, - DRM_FORMAT_XBGR8888, - DRM_FORMAT_ABGR8888, - DRM_FORMAT_RGB565 -}; - -static const u32 cursor_formats[] = { - DRM_FORMAT_ARGB8888 -}; - -static int get_plane_formats(const struct drm_plane *plane, - const struct dc_plane_cap *plane_cap, - uint32_t *formats, int max_formats) -{ - int i, num_formats = 0; - - /* - * TODO: Query support for each group of formats directly from - * DC plane caps. This will require adding more formats to the - * caps list. - */ - - switch (plane->type) { - case DRM_PLANE_TYPE_PRIMARY: - for (i = 0; i < ARRAY_SIZE(rgb_formats); ++i) { - if (num_formats >= max_formats) - break; - - formats[num_formats++] = rgb_formats[i]; - } - - if (plane_cap && plane_cap->pixel_format_support.nv12) - formats[num_formats++] = DRM_FORMAT_NV12; - if (plane_cap && plane_cap->pixel_format_support.p010) - formats[num_formats++] = DRM_FORMAT_P010; - if (plane_cap && plane_cap->pixel_format_support.fp16) { - formats[num_formats++] = DRM_FORMAT_XRGB16161616F; - formats[num_formats++] = DRM_FORMAT_ARGB16161616F; - formats[num_formats++] = DRM_FORMAT_XBGR16161616F; - formats[num_formats++] = DRM_FORMAT_ABGR16161616F; - } - break; - - case DRM_PLANE_TYPE_OVERLAY: - for (i = 0; i < ARRAY_SIZE(overlay_formats); ++i) { - if (num_formats >= max_formats) - break; - - formats[num_formats++] = overlay_formats[i]; - } - break; - - case DRM_PLANE_TYPE_CURSOR: - for (i = 0; i < ARRAY_SIZE(cursor_formats); ++i) { - if (num_formats >= max_formats) - break; - - formats[num_formats++] = cursor_formats[i]; - } - break; - } - - return num_formats; -} - -static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, - struct drm_plane *plane, - unsigned long possible_crtcs, - const struct dc_plane_cap *plane_cap) -{ - uint32_t formats[32]; - int num_formats; - int res = -EPERM; - unsigned int supported_rotations; - uint64_t *modifiers = NULL; - - num_formats = get_plane_formats(plane, plane_cap, formats, - ARRAY_SIZE(formats)); - - res = get_plane_modifiers(dm->adev, plane->type, &modifiers); - if (res) - return res; - - if (modifiers == NULL) - adev_to_drm(dm->adev)->mode_config.fb_modifiers_not_supported = true; - - res = drm_universal_plane_init(adev_to_drm(dm->adev), plane, possible_crtcs, - &dm_plane_funcs, formats, num_formats, - modifiers, plane->type, NULL); - kfree(modifiers); - if (res) - return res; - - if (plane->type == DRM_PLANE_TYPE_OVERLAY && - plane_cap && plane_cap->per_pixel_alpha) { - unsigned int blend_caps = BIT(DRM_MODE_BLEND_PIXEL_NONE) | - BIT(DRM_MODE_BLEND_PREMULTI) | - BIT(DRM_MODE_BLEND_COVERAGE); - - drm_plane_create_alpha_property(plane); - drm_plane_create_blend_mode_property(plane, blend_caps); - } - - if (plane->type == DRM_PLANE_TYPE_PRIMARY && - plane_cap && - (plane_cap->pixel_format_support.nv12 || - plane_cap->pixel_format_support.p010)) { - /* This only affects YUV formats. */ - drm_plane_create_color_properties( - plane, - BIT(DRM_COLOR_YCBCR_BT601) | - BIT(DRM_COLOR_YCBCR_BT709) | - BIT(DRM_COLOR_YCBCR_BT2020), - BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) | - BIT(DRM_COLOR_YCBCR_FULL_RANGE), - DRM_COLOR_YCBCR_BT709, DRM_COLOR_YCBCR_LIMITED_RANGE); - } - - supported_rotations = - DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 | - DRM_MODE_ROTATE_180 | DRM_MODE_ROTATE_270; - - if (dm->adev->asic_type >= CHIP_BONAIRE && - plane->type != DRM_PLANE_TYPE_CURSOR) - drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0, - supported_rotations); - - drm_plane_helper_add(plane, &dm_plane_helper_funcs); - - /* Create (reset) the plane state */ - if (plane->funcs->reset) - plane->funcs->reset(plane); - - return 0; -} - static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, struct drm_plane *plane, uint32_t crtc_index) @@ -9084,114 +7730,6 @@ static void remove_stream(struct amdgpu_device *adev, acrtc->enabled = false; } -static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc, - struct dc_cursor_position *position) -{ - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - int x, y; - int xorigin = 0, yorigin = 0; - - if (!crtc || !plane->state->fb) - return 0; - - if ((plane->state->crtc_w > amdgpu_crtc->max_cursor_width) || - (plane->state->crtc_h > amdgpu_crtc->max_cursor_height)) { - DRM_ERROR("%s: bad cursor width or height %d x %d\n", - __func__, - plane->state->crtc_w, - plane->state->crtc_h); - return -EINVAL; - } - - x = plane->state->crtc_x; - y = plane->state->crtc_y; - - if (x <= -amdgpu_crtc->max_cursor_width || - y <= -amdgpu_crtc->max_cursor_height) - return 0; - - if (x < 0) { - xorigin = min(-x, amdgpu_crtc->max_cursor_width - 1); - x = 0; - } - if (y < 0) { - yorigin = min(-y, amdgpu_crtc->max_cursor_height - 1); - y = 0; - } - position->enable = true; - position->translate_by_source = true; - position->x = x; - position->y = y; - position->x_hotspot = xorigin; - position->y_hotspot = yorigin; - - return 0; -} - -static void handle_cursor_update(struct drm_plane *plane, - struct drm_plane_state *old_plane_state) -{ - struct amdgpu_device *adev = drm_to_adev(plane->dev); - struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb); - struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc; - struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL; - struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); - uint64_t address = afb ? afb->address : 0; - struct dc_cursor_position position = {0}; - struct dc_cursor_attributes attributes; - int ret; - - if (!plane->state->fb && !old_plane_state->fb) - return; - - DC_LOG_CURSOR("%s: crtc_id=%d with size %d to %d\n", - __func__, - amdgpu_crtc->crtc_id, - plane->state->crtc_w, - plane->state->crtc_h); - - ret = get_cursor_position(plane, crtc, &position); - if (ret) - return; - - if (!position.enable) { - /* turn off cursor */ - if (crtc_state && crtc_state->stream) { - mutex_lock(&adev->dm.dc_lock); - dc_stream_set_cursor_position(crtc_state->stream, - &position); - mutex_unlock(&adev->dm.dc_lock); - } - return; - } - - amdgpu_crtc->cursor_width = plane->state->crtc_w; - amdgpu_crtc->cursor_height = plane->state->crtc_h; - - memset(&attributes, 0, sizeof(attributes)); - attributes.address.high_part = upper_32_bits(address); - attributes.address.low_part = lower_32_bits(address); - attributes.width = plane->state->crtc_w; - attributes.height = plane->state->crtc_h; - attributes.color_format = CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA; - attributes.rotation_angle = 0; - attributes.attribute_flags.value = 0; - - attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0]; - - if (crtc_state->stream) { - mutex_lock(&adev->dm.dc_lock); - if (!dc_stream_set_cursor_attributes(crtc_state->stream, - &attributes)) - DRM_ERROR("DC failed to set cursor attributes\n"); - - if (!dc_stream_set_cursor_position(crtc_state->stream, - &position)) - DRM_ERROR("DC failed to set cursor position\n"); - mutex_unlock(&adev->dm.dc_lock); - } -} - static void prepare_flip_isr(struct amdgpu_crtc *acrtc) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c new file mode 100644 index 0000000000000..4702a53a0bf9c --- /dev/null +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -0,0 +1,1645 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include +#include +#include +#include +#include + +#include "amdgpu.h" +#include "dal_asic_id.h" +#include "amdgpu_display.h" +#include "amdgpu_dm_trace.h" +#include "gc/gc_11_0_0_offset.h" +#include "gc/gc_11_0_0_sh_mask.h" + +/* + * TODO: these are currently initialized to rgb formats only. + * For future use cases we should either initialize them dynamically based on + * plane capabilities, or initialize this array to all formats, so internal drm + * check will succeed, and let DC implement proper check + */ +static const uint32_t rgb_formats[] = { + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_RGBA8888, + DRM_FORMAT_XRGB2101010, + DRM_FORMAT_XBGR2101010, + DRM_FORMAT_ARGB2101010, + DRM_FORMAT_ABGR2101010, + DRM_FORMAT_XRGB16161616, + DRM_FORMAT_XBGR16161616, + DRM_FORMAT_ARGB16161616, + DRM_FORMAT_ABGR16161616, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_RGB565, +}; + +static const uint32_t overlay_formats[] = { + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_RGBA8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_RGB565 +}; + +static const u32 cursor_formats[] = { + DRM_FORMAT_ARGB8888 +}; + +enum dm_micro_swizzle { + MICRO_SWIZZLE_Z = 0, + MICRO_SWIZZLE_S = 1, + MICRO_SWIZZLE_D = 2, + MICRO_SWIZZLE_R = 3 +}; + +const struct drm_format_info *amd_get_format_info(const struct drm_mode_fb_cmd2 *cmd) +{ + return amdgpu_lookup_format_info(cmd->pixel_format, cmd->modifier[0]); +} + +void fill_blending_from_plane_state(const struct drm_plane_state *plane_state, + bool *per_pixel_alpha, bool *pre_multiplied_alpha, + bool *global_alpha, int *global_alpha_value) +{ + *per_pixel_alpha = false; + *pre_multiplied_alpha = true; + *global_alpha = false; + *global_alpha_value = 0xff; + + if (plane_state->plane->type != DRM_PLANE_TYPE_OVERLAY) + return; + + if (plane_state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI || + plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) { + static const uint32_t alpha_formats[] = { + DRM_FORMAT_ARGB8888, + DRM_FORMAT_RGBA8888, + DRM_FORMAT_ABGR8888, + }; + uint32_t format = plane_state->fb->format->format; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(alpha_formats); ++i) { + if (format == alpha_formats[i]) { + *per_pixel_alpha = true; + break; + } + } + + if (*per_pixel_alpha && plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) + *pre_multiplied_alpha = false; + } + + if (plane_state->alpha < 0xffff) { + *global_alpha = true; + *global_alpha_value = plane_state->alpha >> 8; + } +} + +static void add_modifier(uint64_t **mods, uint64_t *size, uint64_t *cap, uint64_t mod) +{ + if (!*mods) + return; + + if (*cap - *size < 1) { + uint64_t new_cap = *cap * 2; + uint64_t *new_mods = kmalloc(new_cap * sizeof(uint64_t), GFP_KERNEL); + + if (!new_mods) { + kfree(*mods); + *mods = NULL; + return; + } + + memcpy(new_mods, *mods, sizeof(uint64_t) * *size); + kfree(*mods); + *mods = new_mods; + *cap = new_cap; + } + + (*mods)[*size] = mod; + *size += 1; +} + +bool modifier_has_dcc(uint64_t modifier) +{ + return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC, modifier); +} + +unsigned modifier_gfx9_swizzle_mode(uint64_t modifier) +{ + if (modifier == DRM_FORMAT_MOD_LINEAR) + return 0; + + return AMD_FMT_MOD_GET(TILE, modifier); +} + +static void fill_gfx8_tiling_info_from_flags(union dc_tiling_info *tiling_info, + uint64_t tiling_flags) +{ + /* Fill GFX8 params */ + if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == DC_ARRAY_2D_TILED_THIN1) { + unsigned int bankw, bankh, mtaspect, tile_split, num_banks; + + bankw = AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH); + bankh = AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT); + mtaspect = AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT); + tile_split = AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT); + num_banks = AMDGPU_TILING_GET(tiling_flags, NUM_BANKS); + + /* XXX fix me for VI */ + tiling_info->gfx8.num_banks = num_banks; + tiling_info->gfx8.array_mode = + DC_ARRAY_2D_TILED_THIN1; + tiling_info->gfx8.tile_split = tile_split; + tiling_info->gfx8.bank_width = bankw; + tiling_info->gfx8.bank_height = bankh; + tiling_info->gfx8.tile_aspect = mtaspect; + tiling_info->gfx8.tile_mode = + DC_ADDR_SURF_MICRO_TILING_DISPLAY; + } else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) + == DC_ARRAY_1D_TILED_THIN1) { + tiling_info->gfx8.array_mode = DC_ARRAY_1D_TILED_THIN1; + } + + tiling_info->gfx8.pipe_config = + AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG); +} + +static void fill_gfx9_tiling_info_from_device(const struct amdgpu_device *adev, + union dc_tiling_info *tiling_info) +{ + /* Fill GFX9 params */ + tiling_info->gfx9.num_pipes = + adev->gfx.config.gb_addr_config_fields.num_pipes; + tiling_info->gfx9.num_banks = + adev->gfx.config.gb_addr_config_fields.num_banks; + tiling_info->gfx9.pipe_interleave = + adev->gfx.config.gb_addr_config_fields.pipe_interleave_size; + tiling_info->gfx9.num_shader_engines = + adev->gfx.config.gb_addr_config_fields.num_se; + tiling_info->gfx9.max_compressed_frags = + adev->gfx.config.gb_addr_config_fields.max_compress_frags; + tiling_info->gfx9.num_rb_per_se = + adev->gfx.config.gb_addr_config_fields.num_rb_per_se; + tiling_info->gfx9.shaderEnable = 1; + if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) + tiling_info->gfx9.num_pkrs = adev->gfx.config.gb_addr_config_fields.num_pkrs; +} + +static void fill_gfx9_tiling_info_from_modifier(const struct amdgpu_device *adev, + union dc_tiling_info *tiling_info, + uint64_t modifier) +{ + unsigned int mod_bank_xor_bits = AMD_FMT_MOD_GET(BANK_XOR_BITS, modifier); + unsigned int mod_pipe_xor_bits = AMD_FMT_MOD_GET(PIPE_XOR_BITS, modifier); + unsigned int pkrs_log2 = AMD_FMT_MOD_GET(PACKERS, modifier); + unsigned int pipes_log2; + + pipes_log2 = min(5u, mod_pipe_xor_bits); + + fill_gfx9_tiling_info_from_device(adev, tiling_info); + + if (!IS_AMD_FMT_MOD(modifier)) + return; + + tiling_info->gfx9.num_pipes = 1u << pipes_log2; + tiling_info->gfx9.num_shader_engines = 1u << (mod_pipe_xor_bits - pipes_log2); + + if (adev->family >= AMDGPU_FAMILY_NV) { + tiling_info->gfx9.num_pkrs = 1u << pkrs_log2; + } else { + tiling_info->gfx9.num_banks = 1u << mod_bank_xor_bits; + + /* for DCC we know it isn't rb aligned, so rb_per_se doesn't matter. */ + } +} + +static int validate_dcc(struct amdgpu_device *adev, + const enum surface_pixel_format format, + const enum dc_rotation_angle rotation, + const union dc_tiling_info *tiling_info, + const struct dc_plane_dcc_param *dcc, + const struct dc_plane_address *address, + const struct plane_size *plane_size) +{ + struct dc *dc = adev->dm.dc; + struct dc_dcc_surface_param input; + struct dc_surface_dcc_cap output; + + memset(&input, 0, sizeof(input)); + memset(&output, 0, sizeof(output)); + + if (!dcc->enable) + return 0; + + if (format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || + !dc->cap_funcs.get_dcc_compression_cap) + return -EINVAL; + + input.format = format; + input.surface_size.width = plane_size->surface_size.width; + input.surface_size.height = plane_size->surface_size.height; + input.swizzle_mode = tiling_info->gfx9.swizzle; + + if (rotation == ROTATION_ANGLE_0 || rotation == ROTATION_ANGLE_180) + input.scan = SCAN_DIRECTION_HORIZONTAL; + else if (rotation == ROTATION_ANGLE_90 || rotation == ROTATION_ANGLE_270) + input.scan = SCAN_DIRECTION_VERTICAL; + + if (!dc->cap_funcs.get_dcc_compression_cap(dc, &input, &output)) + return -EINVAL; + + if (!output.capable) + return -EINVAL; + + if (dcc->independent_64b_blks == 0 && + output.grph.rgb.independent_64b_blks != 0) + return -EINVAL; + + return 0; +} + +static int fill_gfx9_plane_attributes_from_modifiers(struct amdgpu_device *adev, + const struct amdgpu_framebuffer *afb, + const enum surface_pixel_format format, + const enum dc_rotation_angle rotation, + const struct plane_size *plane_size, + union dc_tiling_info *tiling_info, + struct dc_plane_dcc_param *dcc, + struct dc_plane_address *address, + const bool force_disable_dcc) +{ + const uint64_t modifier = afb->base.modifier; + int ret = 0; + + fill_gfx9_tiling_info_from_modifier(adev, tiling_info, modifier); + tiling_info->gfx9.swizzle = modifier_gfx9_swizzle_mode(modifier); + + if (modifier_has_dcc(modifier) && !force_disable_dcc) { + uint64_t dcc_address = afb->address + afb->base.offsets[1]; + bool independent_64b_blks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier); + bool independent_128b_blks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier); + + dcc->enable = 1; + dcc->meta_pitch = afb->base.pitches[1]; + dcc->independent_64b_blks = independent_64b_blks; + if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) { + if (independent_64b_blks && independent_128b_blks) + dcc->dcc_ind_blk = hubp_ind_block_64b_no_128bcl; + else if (independent_128b_blks) + dcc->dcc_ind_blk = hubp_ind_block_128b; + else if (independent_64b_blks && !independent_128b_blks) + dcc->dcc_ind_blk = hubp_ind_block_64b; + else + dcc->dcc_ind_blk = hubp_ind_block_unconstrained; + } else { + if (independent_64b_blks) + dcc->dcc_ind_blk = hubp_ind_block_64b; + else + dcc->dcc_ind_blk = hubp_ind_block_unconstrained; + } + + address->grph.meta_addr.low_part = lower_32_bits(dcc_address); + address->grph.meta_addr.high_part = upper_32_bits(dcc_address); + } + + ret = validate_dcc(adev, format, rotation, tiling_info, dcc, address, plane_size); + if (ret) + drm_dbg_kms(adev_to_drm(adev), "validate_dcc: returned error: %d\n", ret); + + return ret; +} + +static void add_gfx10_1_modifiers(const struct amdgpu_device *adev, + uint64_t **mods, uint64_t *size, uint64_t *capacity) +{ + int pipe_xor_bits = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_RETILE, 1) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits)); + + + /* Only supported for 64bpp, will be filtered in dm_plane_format_mod_supported */ + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); +} + +static void add_gfx9_modifiers(const struct amdgpu_device *adev, + uint64_t **mods, uint64_t *size, uint64_t *capacity) +{ + int pipes = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes); + int pipe_xor_bits = min(8, pipes + + ilog2(adev->gfx.config.gb_addr_config_fields.num_se)); + int bank_xor_bits = min(8 - pipe_xor_bits, + ilog2(adev->gfx.config.gb_addr_config_fields.num_banks)); + int rb = ilog2(adev->gfx.config.gb_addr_config_fields.num_se) + + ilog2(adev->gfx.config.gb_addr_config_fields.num_rb_per_se); + + + if (adev->family == AMDGPU_FAMILY_RV) { + /* Raven2 and later */ + bool has_constant_encode = adev->asic_type > CHIP_RAVEN || adev->external_rev_id >= 0x81; + + /* + * No _D DCC swizzles yet because we only allow 32bpp, which + * doesn't support _D on DCN + */ + + if (has_constant_encode) { + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1)); + } + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 0)); + + if (has_constant_encode) { + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_RETILE, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | + + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | + AMD_FMT_MOD_SET(RB, rb) | + AMD_FMT_MOD_SET(PIPE, pipes)); + } + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_RETILE, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 0) | + AMD_FMT_MOD_SET(RB, rb) | + AMD_FMT_MOD_SET(PIPE, pipes)); + } + + /* + * Only supported for 64bpp on Raven, will be filtered on format in + * dm_plane_format_mod_supported. + */ + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits)); + + if (adev->family == AMDGPU_FAMILY_RV) { + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits)); + } + + /* + * Only supported for 64bpp on Raven, will be filtered on format in + * dm_plane_format_mod_supported. + */ + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); + + if (adev->family == AMDGPU_FAMILY_RV) { + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); + } +} + +static void add_gfx10_3_modifiers(const struct amdgpu_device *adev, + uint64_t **mods, uint64_t *size, uint64_t *capacity) +{ + int pipe_xor_bits = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes); + int pkrs = ilog2(adev->gfx.config.gb_addr_config_fields.num_pkrs); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(PACKERS, pkrs) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(PACKERS, pkrs) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(PACKERS, pkrs) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_RETILE, 1) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(PACKERS, pkrs) | + AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_RETILE, 1) | + AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(PACKERS, pkrs)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(PACKERS, pkrs)); + + /* Only supported for 64bpp, will be filtered in dm_plane_format_mod_supported */ + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); +} + +static void add_gfx11_modifiers(struct amdgpu_device *adev, + uint64_t **mods, uint64_t *size, uint64_t *capacity) +{ + int num_pipes = 0; + int pipe_xor_bits = 0; + int num_pkrs = 0; + int pkrs = 0; + u32 gb_addr_config; + u8 i = 0; + unsigned swizzle_r_x; + uint64_t modifier_r_x; + uint64_t modifier_dcc_best; + uint64_t modifier_dcc_4k; + + /* TODO: GFX11 IP HW init hasnt finish and we get zero if we read from + * adev->gfx.config.gb_addr_config_fields.num_{pkrs,pipes} + */ + gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG); + ASSERT(gb_addr_config != 0); + + num_pkrs = 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS); + pkrs = ilog2(num_pkrs); + num_pipes = 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PIPES); + pipe_xor_bits = ilog2(num_pipes); + + for (i = 0; i < 2; i++) { + /* Insert the best one first. */ + /* R_X swizzle modes are the best for rendering and DCC requires them. */ + if (num_pipes > 16) + swizzle_r_x = !i ? AMD_FMT_MOD_TILE_GFX11_256K_R_X : AMD_FMT_MOD_TILE_GFX9_64K_R_X; + else + swizzle_r_x = !i ? AMD_FMT_MOD_TILE_GFX9_64K_R_X : AMD_FMT_MOD_TILE_GFX11_256K_R_X; + + modifier_r_x = AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) | + AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | + AMD_FMT_MOD_SET(TILE, swizzle_r_x) | + AMD_FMT_MOD_SET(PACKERS, pkrs); + + /* DCC_CONSTANT_ENCODE is not set because it can't vary with gfx11 (it's implied to be 1). */ + modifier_dcc_best = modifier_r_x | AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 0) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B); + + /* DCC settings for 4K and greater resolutions. (required by display hw) */ + modifier_dcc_4k = modifier_r_x | AMD_FMT_MOD_SET(DCC, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | + AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | + AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B); + + add_modifier(mods, size, capacity, modifier_dcc_best); + add_modifier(mods, size, capacity, modifier_dcc_4k); + + add_modifier(mods, size, capacity, modifier_dcc_best | AMD_FMT_MOD_SET(DCC_RETILE, 1)); + add_modifier(mods, size, capacity, modifier_dcc_4k | AMD_FMT_MOD_SET(DCC_RETILE, 1)); + + add_modifier(mods, size, capacity, modifier_r_x); + } + + add_modifier(mods, size, capacity, AMD_FMT_MOD | + AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) | + AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D)); +} + +static int get_plane_modifiers(struct amdgpu_device *adev, unsigned int plane_type, uint64_t **mods) +{ + uint64_t size = 0, capacity = 128; + *mods = NULL; + + /* We have not hooked up any pre-GFX9 modifiers. */ + if (adev->family < AMDGPU_FAMILY_AI) + return 0; + + *mods = kmalloc(capacity * sizeof(uint64_t), GFP_KERNEL); + + if (plane_type == DRM_PLANE_TYPE_CURSOR) { + add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_LINEAR); + add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_INVALID); + return *mods ? 0 : -ENOMEM; + } + + switch (adev->family) { + case AMDGPU_FAMILY_AI: + case AMDGPU_FAMILY_RV: + add_gfx9_modifiers(adev, mods, &size, &capacity); + break; + case AMDGPU_FAMILY_NV: + case AMDGPU_FAMILY_VGH: + case AMDGPU_FAMILY_YC: + case AMDGPU_FAMILY_GC_10_3_6: + case AMDGPU_FAMILY_GC_10_3_7: + if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) + add_gfx10_3_modifiers(adev, mods, &size, &capacity); + else + add_gfx10_1_modifiers(adev, mods, &size, &capacity); + break; + case AMDGPU_FAMILY_GC_11_0_0: + case AMDGPU_FAMILY_GC_11_0_2: + add_gfx11_modifiers(adev, mods, &size, &capacity); + break; + } + + add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_LINEAR); + + /* INVALID marks the end of the list. */ + add_modifier(mods, &size, &capacity, DRM_FORMAT_MOD_INVALID); + + if (!*mods) + return -ENOMEM; + + return 0; +} + +static int get_plane_formats(const struct drm_plane *plane, + const struct dc_plane_cap *plane_cap, + uint32_t *formats, int max_formats) +{ + int i, num_formats = 0; + + /* + * TODO: Query support for each group of formats directly from + * DC plane caps. This will require adding more formats to the + * caps list. + */ + + switch (plane->type) { + case DRM_PLANE_TYPE_PRIMARY: + for (i = 0; i < ARRAY_SIZE(rgb_formats); ++i) { + if (num_formats >= max_formats) + break; + + formats[num_formats++] = rgb_formats[i]; + } + + if (plane_cap && plane_cap->pixel_format_support.nv12) + formats[num_formats++] = DRM_FORMAT_NV12; + if (plane_cap && plane_cap->pixel_format_support.p010) + formats[num_formats++] = DRM_FORMAT_P010; + if (plane_cap && plane_cap->pixel_format_support.fp16) { + formats[num_formats++] = DRM_FORMAT_XRGB16161616F; + formats[num_formats++] = DRM_FORMAT_ARGB16161616F; + formats[num_formats++] = DRM_FORMAT_XBGR16161616F; + formats[num_formats++] = DRM_FORMAT_ABGR16161616F; + } + break; + + case DRM_PLANE_TYPE_OVERLAY: + for (i = 0; i < ARRAY_SIZE(overlay_formats); ++i) { + if (num_formats >= max_formats) + break; + + formats[num_formats++] = overlay_formats[i]; + } + break; + + case DRM_PLANE_TYPE_CURSOR: + for (i = 0; i < ARRAY_SIZE(cursor_formats); ++i) { + if (num_formats >= max_formats) + break; + + formats[num_formats++] = cursor_formats[i]; + } + break; + } + + return num_formats; +} + +#ifdef CONFIG_DRM_AMD_DC_HDR +static int attach_color_mgmt_properties(struct amdgpu_display_manager *dm, struct drm_plane *plane) +{ + drm_object_attach_property(&plane->base, + dm->degamma_lut_property, + 0); + drm_object_attach_property(&plane->base, + dm->degamma_lut_size_property, + MAX_COLOR_LUT_ENTRIES); + drm_object_attach_property(&plane->base, dm->ctm_property, + 0); + drm_object_attach_property(&plane->base, dm->sdr_boost_property, + DEFAULT_SDR_BOOST); + + return 0; +} +#endif + +int fill_plane_buffer_attributes(struct amdgpu_device *adev, + const struct amdgpu_framebuffer *afb, + const enum surface_pixel_format format, + const enum dc_rotation_angle rotation, + const uint64_t tiling_flags, + union dc_tiling_info *tiling_info, + struct plane_size *plane_size, + struct dc_plane_dcc_param *dcc, + struct dc_plane_address *address, + bool tmz_surface, + bool force_disable_dcc) +{ + const struct drm_framebuffer *fb = &afb->base; + int ret; + + memset(tiling_info, 0, sizeof(*tiling_info)); + memset(plane_size, 0, sizeof(*plane_size)); + memset(dcc, 0, sizeof(*dcc)); + memset(address, 0, sizeof(*address)); + + address->tmz_surface = tmz_surface; + + if (format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { + uint64_t addr = afb->address + fb->offsets[0]; + + plane_size->surface_size.x = 0; + plane_size->surface_size.y = 0; + plane_size->surface_size.width = fb->width; + plane_size->surface_size.height = fb->height; + plane_size->surface_pitch = + fb->pitches[0] / fb->format->cpp[0]; + + address->type = PLN_ADDR_TYPE_GRAPHICS; + address->grph.addr.low_part = lower_32_bits(addr); + address->grph.addr.high_part = upper_32_bits(addr); + } else if (format < SURFACE_PIXEL_FORMAT_INVALID) { + uint64_t luma_addr = afb->address + fb->offsets[0]; + uint64_t chroma_addr = afb->address + fb->offsets[1]; + + plane_size->surface_size.x = 0; + plane_size->surface_size.y = 0; + plane_size->surface_size.width = fb->width; + plane_size->surface_size.height = fb->height; + plane_size->surface_pitch = + fb->pitches[0] / fb->format->cpp[0]; + + plane_size->chroma_size.x = 0; + plane_size->chroma_size.y = 0; + /* TODO: set these based on surface format */ + plane_size->chroma_size.width = fb->width / 2; + plane_size->chroma_size.height = fb->height / 2; + + plane_size->chroma_pitch = + fb->pitches[1] / fb->format->cpp[1]; + + address->type = PLN_ADDR_TYPE_VIDEO_PROGRESSIVE; + address->video_progressive.luma_addr.low_part = + lower_32_bits(luma_addr); + address->video_progressive.luma_addr.high_part = + upper_32_bits(luma_addr); + address->video_progressive.chroma_addr.low_part = + lower_32_bits(chroma_addr); + address->video_progressive.chroma_addr.high_part = + upper_32_bits(chroma_addr); + } + + if (adev->family >= AMDGPU_FAMILY_AI) { + ret = fill_gfx9_plane_attributes_from_modifiers(adev, afb, format, + rotation, plane_size, + tiling_info, dcc, + address, + force_disable_dcc); + if (ret) + return ret; + } else { + fill_gfx8_tiling_info_from_flags(tiling_info, tiling_flags); + } + + return 0; +} + +static int dm_plane_helper_prepare_fb(struct drm_plane *plane, + struct drm_plane_state *new_state) +{ + struct amdgpu_framebuffer *afb; + struct drm_gem_object *obj; + struct amdgpu_device *adev; + struct amdgpu_bo *rbo; + struct dm_plane_state *dm_plane_state_new, *dm_plane_state_old; + uint32_t domain; + int r; + + if (!new_state->fb) { + DRM_DEBUG_KMS("No FB bound\n"); + return 0; + } + + afb = to_amdgpu_framebuffer(new_state->fb); + obj = new_state->fb->obj[0]; + rbo = gem_to_amdgpu_bo(obj); + adev = amdgpu_ttm_adev(rbo->tbo.bdev); + + r = amdgpu_bo_reserve(rbo, true); + if (r) { + dev_err(adev->dev, "fail to reserve bo (%d)\n", r); + return r; + } + + r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1); + if (r) { + dev_err(adev->dev, "reserving fence slot failed (%d)\n", r); + goto error_unlock; + } + + if (plane->type != DRM_PLANE_TYPE_CURSOR) + domain = amdgpu_display_supported_domains(adev, rbo->flags); + else + domain = AMDGPU_GEM_DOMAIN_VRAM; + + r = amdgpu_bo_pin(rbo, domain); + if (unlikely(r != 0)) { + if (r != -ERESTARTSYS) + DRM_ERROR("Failed to pin framebuffer with error %d\n", r); + goto error_unlock; + } + + r = amdgpu_ttm_alloc_gart(&rbo->tbo); + if (unlikely(r != 0)) { + DRM_ERROR("%p bind failed\n", rbo); + goto error_unpin; + } + + r = drm_gem_plane_helper_prepare_fb(plane, new_state); + if (unlikely(r != 0)) + goto error_unpin; + + amdgpu_bo_unreserve(rbo); + + afb->address = amdgpu_bo_gpu_offset(rbo); + + amdgpu_bo_ref(rbo); + + /** + * We don't do surface updates on planes that have been newly created, + * but we also don't have the afb->address during atomic check. + * + * Fill in buffer attributes depending on the address here, but only on + * newly created planes since they're not being used by DC yet and this + * won't modify global state. + */ + dm_plane_state_old = to_dm_plane_state(plane->state); + dm_plane_state_new = to_dm_plane_state(new_state); + + if (dm_plane_state_new->dc_state && + dm_plane_state_old->dc_state != dm_plane_state_new->dc_state) { + struct dc_plane_state *plane_state = + dm_plane_state_new->dc_state; + bool force_disable_dcc = !plane_state->dcc.enable; + + fill_plane_buffer_attributes( + adev, afb, plane_state->format, plane_state->rotation, + afb->tiling_flags, + &plane_state->tiling_info, &plane_state->plane_size, + &plane_state->dcc, &plane_state->address, + afb->tmz_surface, force_disable_dcc); + } + + return 0; + +error_unpin: + amdgpu_bo_unpin(rbo); + +error_unlock: + amdgpu_bo_unreserve(rbo); + return r; +} + +static void dm_plane_helper_cleanup_fb(struct drm_plane *plane, + struct drm_plane_state *old_state) +{ + struct amdgpu_bo *rbo; + int r; + + if (!old_state->fb) + return; + + rbo = gem_to_amdgpu_bo(old_state->fb->obj[0]); + r = amdgpu_bo_reserve(rbo, false); + if (unlikely(r)) { + DRM_ERROR("failed to reserve rbo before unpin\n"); + return; + } + + amdgpu_bo_unpin(rbo); + amdgpu_bo_unreserve(rbo); + amdgpu_bo_unref(&rbo); +} + +static void get_min_max_dc_plane_scaling(struct drm_device *dev, + struct drm_framebuffer *fb, + int *min_downscale, int *max_upscale) +{ + struct amdgpu_device *adev = drm_to_adev(dev); + struct dc *dc = adev->dm.dc; + /* Caps for all supported planes are the same on DCE and DCN 1 - 3 */ + struct dc_plane_cap *plane_cap = &dc->caps.planes[0]; + + switch (fb->format->format) { + case DRM_FORMAT_P010: + case DRM_FORMAT_NV12: + case DRM_FORMAT_NV21: + *max_upscale = plane_cap->max_upscale_factor.nv12; + *min_downscale = plane_cap->max_downscale_factor.nv12; + break; + + case DRM_FORMAT_XRGB16161616F: + case DRM_FORMAT_ARGB16161616F: + case DRM_FORMAT_XBGR16161616F: + case DRM_FORMAT_ABGR16161616F: + *max_upscale = plane_cap->max_upscale_factor.fp16; + *min_downscale = plane_cap->max_downscale_factor.fp16; + break; + + default: + *max_upscale = plane_cap->max_upscale_factor.argb8888; + *min_downscale = plane_cap->max_downscale_factor.argb8888; + break; + } + + /* + * A factor of 1 in the plane_cap means to not allow scaling, ie. use a + * scaling factor of 1.0 == 1000 units. + */ + if (*max_upscale == 1) + *max_upscale = 1000; + + if (*min_downscale == 1) + *min_downscale = 1000; +} + +int dm_plane_helper_check_state(struct drm_plane_state *state, + struct drm_crtc_state *new_crtc_state) +{ + struct drm_framebuffer *fb = state->fb; + int min_downscale, max_upscale; + int min_scale = 0; + int max_scale = INT_MAX; + + /* Plane enabled? Validate viewport and get scaling factors from plane caps. */ + if (fb && state->crtc) { + /* Validate viewport to cover the case when only the position changes */ + if (state->plane->type != DRM_PLANE_TYPE_CURSOR) { + int viewport_width = state->crtc_w; + int viewport_height = state->crtc_h; + + if (state->crtc_x < 0) + viewport_width += state->crtc_x; + else if (state->crtc_x + state->crtc_w > new_crtc_state->mode.crtc_hdisplay) + viewport_width = new_crtc_state->mode.crtc_hdisplay - state->crtc_x; + + if (state->crtc_y < 0) + viewport_height += state->crtc_y; + else if (state->crtc_y + state->crtc_h > new_crtc_state->mode.crtc_vdisplay) + viewport_height = new_crtc_state->mode.crtc_vdisplay - state->crtc_y; + + if (viewport_width < 0 || viewport_height < 0) { + DRM_DEBUG_ATOMIC("Plane completely outside of screen\n"); + return -EINVAL; + } else if (viewport_width < MIN_VIEWPORT_SIZE*2) { /* x2 for width is because of pipe-split. */ + DRM_DEBUG_ATOMIC("Viewport width %d smaller than %d\n", viewport_width, MIN_VIEWPORT_SIZE*2); + return -EINVAL; + } else if (viewport_height < MIN_VIEWPORT_SIZE) { + DRM_DEBUG_ATOMIC("Viewport height %d smaller than %d\n", viewport_height, MIN_VIEWPORT_SIZE); + return -EINVAL; + } + + } + + /* Get min/max allowed scaling factors from plane caps. */ + get_min_max_dc_plane_scaling(state->crtc->dev, fb, + &min_downscale, &max_upscale); + /* + * Convert to drm convention: 16.16 fixed point, instead of dc's + * 1.0 == 1000. Also drm scaling is src/dst instead of dc's + * dst/src, so min_scale = 1.0 / max_upscale, etc. + */ + min_scale = (1000 << 16) / max_upscale; + max_scale = (1000 << 16) / min_downscale; + } + + return drm_atomic_helper_check_plane_state( + state, new_crtc_state, min_scale, max_scale, true, true); +} + +int fill_dc_scaling_info(struct amdgpu_device *adev, + const struct drm_plane_state *state, + struct dc_scaling_info *scaling_info) +{ + int scale_w, scale_h, min_downscale, max_upscale; + + memset(scaling_info, 0, sizeof(*scaling_info)); + + /* Source is fixed 16.16 but we ignore mantissa for now... */ + scaling_info->src_rect.x = state->src_x >> 16; + scaling_info->src_rect.y = state->src_y >> 16; + + /* + * For reasons we don't (yet) fully understand a non-zero + * src_y coordinate into an NV12 buffer can cause a + * system hang on DCN1x. + * To avoid hangs (and maybe be overly cautious) + * let's reject both non-zero src_x and src_y. + * + * We currently know of only one use-case to reproduce a + * scenario with non-zero src_x and src_y for NV12, which + * is to gesture the YouTube Android app into full screen + * on ChromeOS. + */ + if (((adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 0)) || + (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 1))) && + (state->fb && state->fb->format->format == DRM_FORMAT_NV12 && + (scaling_info->src_rect.x != 0 || scaling_info->src_rect.y != 0))) + return -EINVAL; + + scaling_info->src_rect.width = state->src_w >> 16; + if (scaling_info->src_rect.width == 0) + return -EINVAL; + + scaling_info->src_rect.height = state->src_h >> 16; + if (scaling_info->src_rect.height == 0) + return -EINVAL; + + scaling_info->dst_rect.x = state->crtc_x; + scaling_info->dst_rect.y = state->crtc_y; + + if (state->crtc_w == 0) + return -EINVAL; + + scaling_info->dst_rect.width = state->crtc_w; + + if (state->crtc_h == 0) + return -EINVAL; + + scaling_info->dst_rect.height = state->crtc_h; + + /* DRM doesn't specify clipping on destination output. */ + scaling_info->clip_rect = scaling_info->dst_rect; + + /* Validate scaling per-format with DC plane caps */ + if (state->plane && state->plane->dev && state->fb) { + get_min_max_dc_plane_scaling(state->plane->dev, state->fb, + &min_downscale, &max_upscale); + } else { + min_downscale = 250; + max_upscale = 16000; + } + + scale_w = scaling_info->dst_rect.width * 1000 / + scaling_info->src_rect.width; + + if (scale_w < min_downscale || scale_w > max_upscale) + return -EINVAL; + + scale_h = scaling_info->dst_rect.height * 1000 / + scaling_info->src_rect.height; + + if (scale_h < min_downscale || scale_h > max_upscale) + return -EINVAL; + + /* + * The "scaling_quality" can be ignored for now, quality = 0 has DC + * assume reasonable defaults based on the format. + */ + + return 0; +} + +static int dm_plane_atomic_check(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, + plane); + struct amdgpu_device *adev = drm_to_adev(plane->dev); + struct dc *dc = adev->dm.dc; + struct dm_plane_state *dm_plane_state; + struct dc_scaling_info scaling_info; + struct drm_crtc_state *new_crtc_state; + int ret; + + trace_amdgpu_dm_plane_atomic_check(new_plane_state); + + dm_plane_state = to_dm_plane_state(new_plane_state); + + if (!dm_plane_state->dc_state) + return 0; + + new_crtc_state = + drm_atomic_get_new_crtc_state(state, + new_plane_state->crtc); + if (!new_crtc_state) + return -EINVAL; + + ret = dm_plane_helper_check_state(new_plane_state, new_crtc_state); + if (ret) + return ret; + + ret = fill_dc_scaling_info(adev, new_plane_state, &scaling_info); + if (ret) + return ret; + + if (dc_validate_plane(dc, dm_plane_state->dc_state) == DC_OK) + return 0; + + return -EINVAL; +} + +static int dm_plane_atomic_async_check(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + /* Only support async updates on cursor planes. */ + if (plane->type != DRM_PLANE_TYPE_CURSOR) + return -EINVAL; + + return 0; +} + +static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc, + struct dc_cursor_position *position) +{ + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); + int x, y; + int xorigin = 0, yorigin = 0; + + if (!crtc || !plane->state->fb) + return 0; + + if ((plane->state->crtc_w > amdgpu_crtc->max_cursor_width) || + (plane->state->crtc_h > amdgpu_crtc->max_cursor_height)) { + DRM_ERROR("%s: bad cursor width or height %d x %d\n", + __func__, + plane->state->crtc_w, + plane->state->crtc_h); + return -EINVAL; + } + + x = plane->state->crtc_x; + y = plane->state->crtc_y; + + if (x <= -amdgpu_crtc->max_cursor_width || + y <= -amdgpu_crtc->max_cursor_height) + return 0; + + if (x < 0) { + xorigin = min(-x, amdgpu_crtc->max_cursor_width - 1); + x = 0; + } + if (y < 0) { + yorigin = min(-y, amdgpu_crtc->max_cursor_height - 1); + y = 0; + } + position->enable = true; + position->translate_by_source = true; + position->x = x; + position->y = y; + position->x_hotspot = xorigin; + position->y_hotspot = yorigin; + + return 0; +} + +void handle_cursor_update(struct drm_plane *plane, + struct drm_plane_state *old_plane_state) +{ + struct amdgpu_device *adev = drm_to_adev(plane->dev); + struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb); + struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc; + struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL; + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); + uint64_t address = afb ? afb->address : 0; + struct dc_cursor_position position = {0}; + struct dc_cursor_attributes attributes; + int ret; + + if (!plane->state->fb && !old_plane_state->fb) + return; + + DC_LOG_CURSOR("%s: crtc_id=%d with size %d to %d\n", + __func__, + amdgpu_crtc->crtc_id, + plane->state->crtc_w, + plane->state->crtc_h); + + ret = get_cursor_position(plane, crtc, &position); + if (ret) + return; + + if (!position.enable) { + /* turn off cursor */ + if (crtc_state && crtc_state->stream) { + mutex_lock(&adev->dm.dc_lock); + dc_stream_set_cursor_position(crtc_state->stream, + &position); + mutex_unlock(&adev->dm.dc_lock); + } + return; + } + + amdgpu_crtc->cursor_width = plane->state->crtc_w; + amdgpu_crtc->cursor_height = plane->state->crtc_h; + + memset(&attributes, 0, sizeof(attributes)); + attributes.address.high_part = upper_32_bits(address); + attributes.address.low_part = lower_32_bits(address); + attributes.width = plane->state->crtc_w; + attributes.height = plane->state->crtc_h; + attributes.color_format = CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA; + attributes.rotation_angle = 0; + attributes.attribute_flags.value = 0; + + attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0]; + + if (crtc_state->stream) { + mutex_lock(&adev->dm.dc_lock); + if (!dc_stream_set_cursor_attributes(crtc_state->stream, + &attributes)) + DRM_ERROR("DC failed to set cursor attributes\n"); + + if (!dc_stream_set_cursor_position(crtc_state->stream, + &position)) + DRM_ERROR("DC failed to set cursor position\n"); + mutex_unlock(&adev->dm.dc_lock); + } +} + +static void dm_plane_atomic_async_update(struct drm_plane *plane, + struct drm_atomic_state *state) +{ + struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, + plane); + struct drm_plane_state *old_state = + drm_atomic_get_old_plane_state(state, plane); + + trace_amdgpu_dm_atomic_update_cursor(new_state); + + swap(plane->state->fb, new_state->fb); + + plane->state->src_x = new_state->src_x; + plane->state->src_y = new_state->src_y; + plane->state->src_w = new_state->src_w; + plane->state->src_h = new_state->src_h; + plane->state->crtc_x = new_state->crtc_x; + plane->state->crtc_y = new_state->crtc_y; + plane->state->crtc_w = new_state->crtc_w; + plane->state->crtc_h = new_state->crtc_h; + + handle_cursor_update(plane, old_state); +} + +static const struct drm_plane_helper_funcs dm_plane_helper_funcs = { + .prepare_fb = dm_plane_helper_prepare_fb, + .cleanup_fb = dm_plane_helper_cleanup_fb, + .atomic_check = dm_plane_atomic_check, + .atomic_async_check = dm_plane_atomic_async_check, + .atomic_async_update = dm_plane_atomic_async_update +}; + +static void dm_drm_plane_reset(struct drm_plane *plane) +{ + struct dm_plane_state *amdgpu_state = NULL; + + if (plane->state) + plane->funcs->atomic_destroy_state(plane, plane->state); + + amdgpu_state = kzalloc(sizeof(*amdgpu_state), GFP_KERNEL); + WARN_ON(amdgpu_state == NULL); + + if (amdgpu_state) + __drm_atomic_helper_plane_reset(plane, &amdgpu_state->base); +#ifdef CONFIG_DRM_AMD_DC_HDR + if (amdgpu_state) + amdgpu_state->sdr_boost = DEFAULT_SDR_BOOST; +#endif +} + +static struct drm_plane_state * +dm_drm_plane_duplicate_state(struct drm_plane *plane) +{ + struct dm_plane_state *dm_plane_state, *old_dm_plane_state; + + old_dm_plane_state = to_dm_plane_state(plane->state); + dm_plane_state = kzalloc(sizeof(*dm_plane_state), GFP_KERNEL); + if (!dm_plane_state) + return NULL; + + __drm_atomic_helper_plane_duplicate_state(plane, &dm_plane_state->base); + + if (old_dm_plane_state->dc_state) { + dm_plane_state->dc_state = old_dm_plane_state->dc_state; + dc_plane_state_retain(dm_plane_state->dc_state); + } + +#ifdef CONFIG_DRM_AMD_DC_HDR + if (dm_plane_state->degamma_lut) + drm_property_blob_get(dm_plane_state->degamma_lut); + if (dm_plane_state->ctm) + drm_property_blob_get(dm_plane_state->ctm); + + dm_plane_state->sdr_boost = old_dm_plane_state->sdr_boost; +#endif + + return &dm_plane_state->base; +} + +static bool dm_plane_format_mod_supported(struct drm_plane *plane, + uint32_t format, + uint64_t modifier) +{ + struct amdgpu_device *adev = drm_to_adev(plane->dev); + const struct drm_format_info *info = drm_format_info(format); + struct hw_asic_id asic_id = adev->dm.dc->ctx->asic_id; + + enum dm_micro_swizzle microtile = modifier_gfx9_swizzle_mode(modifier) & 3; + + if (!info) + return false; + + /* + * We always have to allow these modifiers: + * 1. Core DRM checks for LINEAR support if userspace does not provide modifiers. + * 2. Not passing any modifiers is the same as explicitly passing INVALID. + */ + if (modifier == DRM_FORMAT_MOD_LINEAR || + modifier == DRM_FORMAT_MOD_INVALID) { + return true; + } + + /* check if swizzle mode is supported by this version of DCN */ + switch (asic_id.chip_family) { + case FAMILY_SI: + case FAMILY_CI: + case FAMILY_KV: + case FAMILY_CZ: + case FAMILY_VI: + /* asics before AI does not have modifier support */ + return false; + case FAMILY_AI: + case FAMILY_RV: + case FAMILY_NV: + case FAMILY_VGH: + case FAMILY_YELLOW_CARP: + case AMDGPU_FAMILY_GC_10_3_6: + case AMDGPU_FAMILY_GC_10_3_7: + switch (AMD_FMT_MOD_GET(TILE, modifier)) { + case AMD_FMT_MOD_TILE_GFX9_64K_R_X: + case AMD_FMT_MOD_TILE_GFX9_64K_D_X: + case AMD_FMT_MOD_TILE_GFX9_64K_S_X: + case AMD_FMT_MOD_TILE_GFX9_64K_D: + return true; + default: + return false; + } + break; + case AMDGPU_FAMILY_GC_11_0_0: + switch (AMD_FMT_MOD_GET(TILE, modifier)) { + case AMD_FMT_MOD_TILE_GFX11_256K_R_X: + case AMD_FMT_MOD_TILE_GFX9_64K_R_X: + case AMD_FMT_MOD_TILE_GFX9_64K_D_X: + case AMD_FMT_MOD_TILE_GFX9_64K_S_X: + case AMD_FMT_MOD_TILE_GFX9_64K_D: + return true; + default: + return false; + } + break; + default: + ASSERT(0); /* Unknown asic */ + break; + } + + /* + * For D swizzle the canonical modifier depends on the bpp, so check + * it here. + */ + if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) == AMD_FMT_MOD_TILE_VER_GFX9 && + adev->family >= AMDGPU_FAMILY_NV) { + if (microtile == MICRO_SWIZZLE_D && info->cpp[0] == 4) + return false; + } + + if (adev->family >= AMDGPU_FAMILY_RV && microtile == MICRO_SWIZZLE_D && + info->cpp[0] < 8) + return false; + + if (modifier_has_dcc(modifier)) { + /* Per radeonsi comments 16/64 bpp are more complicated. */ + if (info->cpp[0] != 4) + return false; + /* We support multi-planar formats, but not when combined with + * additional DCC metadata planes. + */ + if (info->num_planes > 1) + return false; + } + + return true; +} + +static void dm_drm_plane_destroy_state(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct dm_plane_state *dm_plane_state = to_dm_plane_state(state); + +#ifdef CONFIG_DRM_AMD_DC_HDR + drm_property_blob_put(dm_plane_state->degamma_lut); + drm_property_blob_put(dm_plane_state->ctm); +#endif + if (dm_plane_state->dc_state) + dc_plane_state_release(dm_plane_state->dc_state); + + drm_atomic_helper_plane_destroy_state(plane, state); +} + +#ifdef CONFIG_DRM_AMD_DC_HDR +/* copied from drm_atomic_uapi.c */ +static int atomic_replace_property_blob_from_id(struct drm_device *dev, + struct drm_property_blob **blob, + uint64_t blob_id, + ssize_t expected_size, + ssize_t expected_elem_size, + bool *replaced) +{ + struct drm_property_blob *new_blob = NULL; + + if (blob_id != 0) { + new_blob = drm_property_lookup_blob(dev, blob_id); + if (new_blob == NULL) + return -EINVAL; + + if (expected_size > 0 && + new_blob->length != expected_size) { + drm_property_blob_put(new_blob); + return -EINVAL; + } + if (expected_elem_size > 0 && + new_blob->length % expected_elem_size != 0) { + drm_property_blob_put(new_blob); + return -EINVAL; + } + } + + *replaced |= drm_property_replace_blob(blob, new_blob); + drm_property_blob_put(new_blob); + + return 0; +} + +int dm_drm_plane_set_property(struct drm_plane *plane, + struct drm_plane_state *state, + struct drm_property *property, + uint64_t val) +{ + struct amdgpu_device *adev = drm_to_adev(plane->dev); + struct dm_plane_state *dm_plane_state = to_dm_plane_state(state); + int ret = 0; + bool replaced; + + if (property == adev->dm.degamma_lut_property) { + ret = atomic_replace_property_blob_from_id(adev_to_drm(adev), + &dm_plane_state->degamma_lut, + val, -1, sizeof(struct drm_color_lut), + &replaced); + } else if (property == adev->dm.ctm_property) { + ret = atomic_replace_property_blob_from_id(adev_to_drm(adev), + &dm_plane_state->ctm, + val, + sizeof(struct drm_color_ctm), -1, + &replaced); + } else if (property == adev->dm.sdr_boost_property) { + dm_plane_state->sdr_boost = val; + } else { + return -EINVAL; + } + + return ret; +} + +int dm_drm_plane_get_property(struct drm_plane *plane, + const struct drm_plane_state *state, + struct drm_property *property, + uint64_t *val) +{ + struct dm_plane_state *dm_plane_state = to_dm_plane_state(state); + struct amdgpu_device *adev = drm_to_adev(plane->dev); + + if (property == adev->dm.degamma_lut_property) { + *val = (dm_plane_state->degamma_lut) ? + dm_plane_state->degamma_lut->base.id : 0; + } else if (property == adev->dm.ctm_property) { + *val = (dm_plane_state->ctm) ? dm_plane_state->ctm->base.id : 0; + } else if (property == adev->dm.sdr_boost_property) { + *val = dm_plane_state->sdr_boost; + } else { + return -EINVAL; + } + + return 0; +} +#endif + +static const struct drm_plane_funcs dm_plane_funcs = { + .update_plane = drm_atomic_helper_update_plane, + .disable_plane = drm_atomic_helper_disable_plane, + .destroy = drm_primary_helper_destroy, + .reset = dm_drm_plane_reset, + .atomic_duplicate_state = dm_drm_plane_duplicate_state, + .atomic_destroy_state = dm_drm_plane_destroy_state, + .format_mod_supported = dm_plane_format_mod_supported, +#ifdef CONFIG_DRM_AMD_DC_HDR + .atomic_set_property = dm_drm_plane_set_property, + .atomic_get_property = dm_drm_plane_get_property, +#endif +}; + +int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, + struct drm_plane *plane, + unsigned long possible_crtcs, + const struct dc_plane_cap *plane_cap) +{ + uint32_t formats[32]; + int num_formats; + int res = -EPERM; + unsigned int supported_rotations; + uint64_t *modifiers = NULL; + + num_formats = get_plane_formats(plane, plane_cap, formats, + ARRAY_SIZE(formats)); + + res = get_plane_modifiers(dm->adev, plane->type, &modifiers); + if (res) + return res; + + res = drm_universal_plane_init(adev_to_drm(dm->adev), plane, possible_crtcs, + &dm_plane_funcs, formats, num_formats, + modifiers, plane->type, NULL); + kfree(modifiers); + if (res) + return res; + + if (plane->type == DRM_PLANE_TYPE_OVERLAY && + plane_cap && plane_cap->per_pixel_alpha) { + unsigned int blend_caps = BIT(DRM_MODE_BLEND_PIXEL_NONE) | + BIT(DRM_MODE_BLEND_PREMULTI) | + BIT(DRM_MODE_BLEND_COVERAGE); + + drm_plane_create_alpha_property(plane); + drm_plane_create_blend_mode_property(plane, blend_caps); + } + + if (plane->type == DRM_PLANE_TYPE_PRIMARY && + plane_cap && + (plane_cap->pixel_format_support.nv12 || + plane_cap->pixel_format_support.p010)) { + /* This only affects YUV formats. */ + drm_plane_create_color_properties( + plane, + BIT(DRM_COLOR_YCBCR_BT601) | + BIT(DRM_COLOR_YCBCR_BT709) | + BIT(DRM_COLOR_YCBCR_BT2020), + BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) | + BIT(DRM_COLOR_YCBCR_FULL_RANGE), + DRM_COLOR_YCBCR_BT709, DRM_COLOR_YCBCR_LIMITED_RANGE); + } + + supported_rotations = + DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 | + DRM_MODE_ROTATE_180 | DRM_MODE_ROTATE_270; + + if (dm->adev->asic_type >= CHIP_BONAIRE && + plane->type != DRM_PLANE_TYPE_CURSOR) + drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0, + supported_rotations); + + drm_plane_helper_add(plane, &dm_plane_helper_funcs); + +#ifdef CONFIG_DRM_AMD_DC_HDR + attach_color_mgmt_properties(dm, plane); +#endif + /* Create (reset) the plane state */ + if (plane->funcs->reset) + plane->funcs->reset(plane); + + return 0; +} + diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h new file mode 100644 index 0000000000000..95168c2cfa6fa --- /dev/null +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __AMDGPU_DM_PLANE_H__ +#define __AMDGPU_DM_PLANE_H__ + +#include "dc.h" + +void handle_cursor_update(struct drm_plane *plane, + struct drm_plane_state *old_plane_state); + +int fill_dc_scaling_info(struct amdgpu_device *adev, + const struct drm_plane_state *state, + struct dc_scaling_info *scaling_info); + +void get_min_max_dc_plane_scaling(struct drm_device *dev, + struct drm_framebuffer *fb, + int *min_downscale, int *max_upscale); + +int dm_plane_helper_check_state(struct drm_plane_state *state, + struct drm_crtc_state *new_crtc_state); + +bool modifier_has_dcc(uint64_t modifier); + +unsigned int modifier_gfx9_swizzle_mode(uint64_t modifier); + +int fill_plane_buffer_attributes(struct amdgpu_device *adev, + const struct amdgpu_framebuffer *afb, + const enum surface_pixel_format format, + const enum dc_rotation_angle rotation, + const uint64_t tiling_flags, + union dc_tiling_info *tiling_info, + struct plane_size *plane_size, + struct dc_plane_dcc_param *dcc, + struct dc_plane_address *address, + bool tmz_surface, + bool force_disable_dcc); + +int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, + struct drm_plane *plane, + unsigned long possible_crtcs, + const struct dc_plane_cap *plane_cap); + +const struct drm_format_info *amd_get_format_info(const struct drm_mode_fb_cmd2 *cmd); + +void fill_blending_from_plane_state(const struct drm_plane_state *plane_state, + bool *per_pixel_alpha, bool *pre_multiplied_alpha, + bool *global_alpha, int *global_alpha_value); + +#endif From 473683a03495a5ed9c75a02c7497f32d6cf92c7a Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Wed, 20 Jul 2022 15:43:21 -0400 Subject: [PATCH 037/134] drm/amd/display: Create a file dedicated for CRTC [Why] The amdgpu_dm file contains most of the code that works as an interface between DRM API and DC. As a result, this file becomes very large since it comprises multiple abstractions such as CRTC manipulation. [How] This commit extracts the CRTC code to its specific file named amdgpu_dm_crtc. This change does not change anything inside the functions; the only exception is converting some static functions to a global function. v2: fix ifdef merge mix up (Alex) Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Acked-by: Alan Liu Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/Makefile | 1 + .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 434 +--------------- .../amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 463 ++++++++++++++++++ .../amd/display/amdgpu_dm/amdgpu_dm_crtc.h | 51 ++ 4 files changed, 516 insertions(+), 433 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c create mode 100644 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile index ec559ea902a39..90fb0f3cdb6fd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/Makefile @@ -28,6 +28,7 @@ AMDGPUDM = \ amdgpu_dm.o \ amdgpu_dm_plane.o \ + amdgpu_dm_crtc.o \ amdgpu_dm_irq.o \ amdgpu_dm_mst_types.o \ amdgpu_dm_color.o diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 8a1821a2551ca..4e51b06fcdd9b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -47,6 +47,7 @@ #include "atom.h" #include "amdgpu_dm.h" #include "amdgpu_dm_plane.h" +#include "amdgpu_dm_crtc.h" #ifdef CONFIG_DRM_AMD_DC_HDCP #include "amdgpu_dm_hdcp.h" #include @@ -207,9 +208,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev); /* removes and deallocates the drm structures, created by the above function */ static void amdgpu_dm_destroy_drm_device(struct amdgpu_display_manager *dm); -static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, - struct drm_plane *plane, - uint32_t link_index); static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm, struct amdgpu_dm_connector *amdgpu_dm_connector, uint32_t link_index, @@ -338,20 +336,6 @@ get_crtc_by_otg_inst(struct amdgpu_device *adev, return NULL; } -static inline bool amdgpu_dm_vrr_active_irq(struct amdgpu_crtc *acrtc) -{ - return acrtc->dm_irq_params.freesync_config.state == - VRR_STATE_ACTIVE_VARIABLE || - acrtc->dm_irq_params.freesync_config.state == - VRR_STATE_ACTIVE_FIXED; -} - -static inline bool amdgpu_dm_vrr_active(struct dm_crtc_state *dm_state) -{ - return dm_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE || - dm_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED; -} - static inline bool is_dc_timing_adjust_needed(struct dm_crtc_state *old_state, struct dm_crtc_state *new_state) { @@ -467,26 +451,6 @@ static void dm_pflip_high_irq(void *interrupt_params) vrr_active, (int) !e); } -static void dm_crtc_handle_vblank(struct amdgpu_crtc *acrtc) -{ - struct drm_crtc *crtc = &acrtc->base; - struct drm_device *dev = crtc->dev; - unsigned long flags; - - drm_crtc_handle_vblank(crtc); - - spin_lock_irqsave(&dev->event_lock, flags); - - /* Send completion event for cursor-only commits */ - if (acrtc->event && acrtc->pflip_status != AMDGPU_FLIP_SUBMITTED) { - drm_crtc_send_vblank_event(crtc, acrtc->event); - drm_crtc_vblank_put(crtc); - acrtc->event = NULL; - } - - spin_unlock_irqrestore(&dev->event_lock, flags); -} - static void dm_vupdate_high_irq(void *interrupt_params) { struct common_irq_params *irq_params = interrupt_params; @@ -1264,52 +1228,6 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_ } -static void vblank_control_worker(struct work_struct *work) -{ - struct vblank_control_work *vblank_work = - container_of(work, struct vblank_control_work, work); - struct amdgpu_display_manager *dm = vblank_work->dm; - - mutex_lock(&dm->dc_lock); - - if (vblank_work->enable) - dm->active_vblank_irq_count++; - else if(dm->active_vblank_irq_count) - dm->active_vblank_irq_count--; - - dc_allow_idle_optimizations(dm->dc, dm->active_vblank_irq_count == 0); - - DRM_DEBUG_KMS("Allow idle optimizations (MALL): %d\n", dm->active_vblank_irq_count == 0); - - /* - * Control PSR based on vblank requirements from OS - * - * If panel supports PSR SU, there's no need to disable PSR when OS is - * submitting fast atomic commits (we infer this by whether the OS - * requests vblank events). Fast atomic commits will simply trigger a - * full-frame-update (FFU); a specific case of selective-update (SU) - * where the SU region is the full hactive*vactive region. See - * fill_dc_dirty_rects(). - */ - if (vblank_work->stream && vblank_work->stream->link) { - if (vblank_work->enable) { - if (vblank_work->stream->link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 && - vblank_work->stream->link->psr_settings.psr_allow_active) - amdgpu_dm_psr_disable(vblank_work->stream); - } else if (vblank_work->stream->link->psr_settings.psr_feature_enabled && - !vblank_work->stream->link->psr_settings.psr_allow_active && - vblank_work->acrtc->dm_irq_params.allow_psr_entry) { - amdgpu_dm_psr_enable(vblank_work->stream); - } - } - - mutex_unlock(&dm->dc_lock); - - dc_stream_release(vblank_work->stream); - - kfree(vblank_work); -} - static void dm_handle_hpd_rx_offload_work(struct work_struct *work) { struct hpd_rx_irq_offload_work *offload_work; @@ -2385,9 +2303,6 @@ static int dm_hw_fini(void *handle) } -static int dm_enable_vblank(struct drm_crtc *crtc); -static void dm_disable_vblank(struct drm_crtc *crtc); - static void dm_gpureset_toggle_interrupts(struct amdgpu_device *adev, struct dc_state *state, bool enable) { @@ -4679,13 +4594,6 @@ static int dm_early_init(void *handle) return 0; } -static bool modeset_required(struct drm_crtc_state *crtc_state, - struct dc_stream_state *new_stream, - struct dc_stream_state *old_stream) -{ - return crtc_state->active && drm_atomic_crtc_needs_modeset(crtc_state); -} - static bool modereset_required(struct drm_crtc_state *crtc_state) { return !crtc_state->active && drm_atomic_crtc_needs_modeset(crtc_state); @@ -5916,182 +5824,6 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, return stream; } -static void amdgpu_dm_crtc_destroy(struct drm_crtc *crtc) -{ - drm_crtc_cleanup(crtc); - kfree(crtc); -} - -static void dm_crtc_destroy_state(struct drm_crtc *crtc, - struct drm_crtc_state *state) -{ - struct dm_crtc_state *cur = to_dm_crtc_state(state); - - /* TODO Destroy dc_stream objects are stream object is flattened */ - if (cur->stream) - dc_stream_release(cur->stream); - - - __drm_atomic_helper_crtc_destroy_state(state); - - - kfree(state); -} - -static void dm_crtc_reset_state(struct drm_crtc *crtc) -{ - struct dm_crtc_state *state; - - if (crtc->state) - dm_crtc_destroy_state(crtc, crtc->state); - - state = kzalloc(sizeof(*state), GFP_KERNEL); - if (WARN_ON(!state)) - return; - - __drm_atomic_helper_crtc_reset(crtc, &state->base); -} - -static struct drm_crtc_state * -dm_crtc_duplicate_state(struct drm_crtc *crtc) -{ - struct dm_crtc_state *state, *cur; - - cur = to_dm_crtc_state(crtc->state); - - if (WARN_ON(!crtc->state)) - return NULL; - - state = kzalloc(sizeof(*state), GFP_KERNEL); - if (!state) - return NULL; - - __drm_atomic_helper_crtc_duplicate_state(crtc, &state->base); - - if (cur->stream) { - state->stream = cur->stream; - dc_stream_retain(state->stream); - } - - state->active_planes = cur->active_planes; - state->vrr_infopacket = cur->vrr_infopacket; - state->abm_level = cur->abm_level; - state->vrr_supported = cur->vrr_supported; - state->freesync_config = cur->freesync_config; - state->cm_has_degamma = cur->cm_has_degamma; - state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb; - state->mpo_requested = cur->mpo_requested; - /* TODO Duplicate dc_stream after objects are stream object is flattened */ - - return &state->base; -} - -#ifdef CONFIG_DEBUG_FS -static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc) -{ - crtc_debugfs_init(crtc); - - return 0; -} -#endif - -static inline int dm_set_vupdate_irq(struct drm_crtc *crtc, bool enable) -{ - enum dc_irq_source irq_source; - struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); - struct amdgpu_device *adev = drm_to_adev(crtc->dev); - int rc; - - irq_source = IRQ_TYPE_VUPDATE + acrtc->otg_inst; - - rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY; - - DRM_DEBUG_VBL("crtc %d - vupdate irq %sabling: r=%d\n", - acrtc->crtc_id, enable ? "en" : "dis", rc); - return rc; -} - -static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable) -{ - enum dc_irq_source irq_source; - struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); - struct amdgpu_device *adev = drm_to_adev(crtc->dev); - struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state); - struct amdgpu_display_manager *dm = &adev->dm; - struct vblank_control_work *work; - int rc = 0; - - if (enable) { - /* vblank irq on -> Only need vupdate irq in vrr mode */ - if (amdgpu_dm_vrr_active(acrtc_state)) - rc = dm_set_vupdate_irq(crtc, true); - } else { - /* vblank irq off -> vupdate irq off */ - rc = dm_set_vupdate_irq(crtc, false); - } - - if (rc) - return rc; - - irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst; - - if (!dc_interrupt_set(adev->dm.dc, irq_source, enable)) - return -EBUSY; - - if (amdgpu_in_reset(adev)) - return 0; - - if (dm->vblank_control_workqueue) { - work = kzalloc(sizeof(*work), GFP_ATOMIC); - if (!work) - return -ENOMEM; - - INIT_WORK(&work->work, vblank_control_worker); - work->dm = dm; - work->acrtc = acrtc; - work->enable = enable; - - if (acrtc_state->stream) { - dc_stream_retain(acrtc_state->stream); - work->stream = acrtc_state->stream; - } - - queue_work(dm->vblank_control_workqueue, &work->work); - } - - return 0; -} - -static int dm_enable_vblank(struct drm_crtc *crtc) -{ - return dm_set_vblank(crtc, true); -} - -static void dm_disable_vblank(struct drm_crtc *crtc) -{ - dm_set_vblank(crtc, false); -} - -/* Implemented only the options currently available for the driver */ -static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { - .reset = dm_crtc_reset_state, - .destroy = amdgpu_dm_crtc_destroy, - .set_config = drm_atomic_helper_set_config, - .page_flip = drm_atomic_helper_page_flip, - .atomic_duplicate_state = dm_crtc_duplicate_state, - .atomic_destroy_state = dm_crtc_destroy_state, - .set_crc_source = amdgpu_dm_crtc_set_crc_source, - .verify_crc_source = amdgpu_dm_crtc_verify_crc_source, - .get_crc_sources = amdgpu_dm_crtc_get_crc_sources, - .get_vblank_counter = amdgpu_get_vblank_counter_kms, - .enable_vblank = dm_enable_vblank, - .disable_vblank = dm_disable_vblank, - .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp, -#if defined(CONFIG_DEBUG_FS) - .late_register = amdgpu_dm_crtc_late_register, -#endif -}; - static enum drm_connector_status amdgpu_dm_connector_detect(struct drm_connector *connector, bool force) { @@ -6627,113 +6359,6 @@ amdgpu_dm_connector_helper_funcs = { .atomic_check = amdgpu_dm_connector_atomic_check, }; -static void dm_crtc_helper_disable(struct drm_crtc *crtc) -{ -} - -static int count_crtc_active_planes(struct drm_crtc_state *new_crtc_state) -{ - struct drm_atomic_state *state = new_crtc_state->state; - struct drm_plane *plane; - int num_active = 0; - - drm_for_each_plane_mask(plane, state->dev, new_crtc_state->plane_mask) { - struct drm_plane_state *new_plane_state; - - /* Cursor planes are "fake". */ - if (plane->type == DRM_PLANE_TYPE_CURSOR) - continue; - - new_plane_state = drm_atomic_get_new_plane_state(state, plane); - - if (!new_plane_state) { - /* - * The plane is enable on the CRTC and hasn't changed - * state. This means that it previously passed - * validation and is therefore enabled. - */ - num_active += 1; - continue; - } - - /* We need a framebuffer to be considered enabled. */ - num_active += (new_plane_state->fb != NULL); - } - - return num_active; -} - -static void dm_update_crtc_active_planes(struct drm_crtc *crtc, - struct drm_crtc_state *new_crtc_state) -{ - struct dm_crtc_state *dm_new_crtc_state = - to_dm_crtc_state(new_crtc_state); - - dm_new_crtc_state->active_planes = 0; - - if (!dm_new_crtc_state->stream) - return; - - dm_new_crtc_state->active_planes = - count_crtc_active_planes(new_crtc_state); -} - -static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc, - struct drm_atomic_state *state) -{ - struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, - crtc); - struct amdgpu_device *adev = drm_to_adev(crtc->dev); - struct dc *dc = adev->dm.dc; - struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state); - int ret = -EINVAL; - - trace_amdgpu_dm_crtc_atomic_check(crtc_state); - - dm_update_crtc_active_planes(crtc, crtc_state); - - if (WARN_ON(unlikely(!dm_crtc_state->stream && - modeset_required(crtc_state, NULL, dm_crtc_state->stream)))) { - return ret; - } - - /* - * We require the primary plane to be enabled whenever the CRTC is, otherwise - * drm_mode_cursor_universal may end up trying to enable the cursor plane while all other - * planes are disabled, which is not supported by the hardware. And there is legacy - * userspace which stops using the HW cursor altogether in response to the resulting EINVAL. - */ - if (crtc_state->enable && - !(crtc_state->plane_mask & drm_plane_mask(crtc->primary))) { - DRM_DEBUG_ATOMIC("Can't enable a CRTC without enabling the primary plane\n"); - return -EINVAL; - } - - /* In some use cases, like reset, no stream is attached */ - if (!dm_crtc_state->stream) - return 0; - - if (dc_validate_stream(dc, dm_crtc_state->stream) == DC_OK) - return 0; - - DRM_DEBUG_ATOMIC("Failed DC stream validation\n"); - return ret; -} - -static bool dm_crtc_helper_mode_fixup(struct drm_crtc *crtc, - const struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - return true; -} - -static const struct drm_crtc_helper_funcs amdgpu_dm_crtc_helper_funcs = { - .disable = dm_crtc_helper_disable, - .atomic_check = dm_crtc_helper_atomic_check, - .mode_fixup = dm_crtc_helper_mode_fixup, - .get_scanout_position = amdgpu_crtc_get_scanout_position, -}; - static void dm_encoder_helper_disable(struct drm_encoder *encoder) { @@ -6891,63 +6516,6 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, } #endif -static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, - struct drm_plane *plane, - uint32_t crtc_index) -{ - struct amdgpu_crtc *acrtc = NULL; - struct drm_plane *cursor_plane; - - int res = -ENOMEM; - - cursor_plane = kzalloc(sizeof(*cursor_plane), GFP_KERNEL); - if (!cursor_plane) - goto fail; - - cursor_plane->type = DRM_PLANE_TYPE_CURSOR; - res = amdgpu_dm_plane_init(dm, cursor_plane, 0, NULL); - - acrtc = kzalloc(sizeof(struct amdgpu_crtc), GFP_KERNEL); - if (!acrtc) - goto fail; - - res = drm_crtc_init_with_planes( - dm->ddev, - &acrtc->base, - plane, - cursor_plane, - &amdgpu_dm_crtc_funcs, NULL); - - if (res) - goto fail; - - drm_crtc_helper_add(&acrtc->base, &amdgpu_dm_crtc_helper_funcs); - - /* Create (reset) the plane state */ - if (acrtc->base.funcs->reset) - acrtc->base.funcs->reset(&acrtc->base); - - acrtc->max_cursor_width = dm->adev->dm.dc->caps.max_cursor_size; - acrtc->max_cursor_height = dm->adev->dm.dc->caps.max_cursor_size; - - acrtc->crtc_id = crtc_index; - acrtc->base.enabled = false; - acrtc->otg_inst = -1; - - dm->adev->mode_info.crtcs[crtc_index] = acrtc; - drm_crtc_enable_color_mgmt(&acrtc->base, MAX_COLOR_LUT_ENTRIES, - true, MAX_COLOR_LUT_ENTRIES); - drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES); - - return 0; - -fail: - kfree(acrtc); - kfree(cursor_plane); - return res; -} - - static int to_drm_connector_type(enum signal_type st) { switch (st) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c new file mode 100644 index 0000000000000..828189cb34417 --- /dev/null +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -0,0 +1,463 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ +#include +#include + +#include "dc.h" +#include "amdgpu.h" +#include "amdgpu_dm_psr.h" +#include "amdgpu_dm_crtc.h" +#include "amdgpu_dm_plane.h" +#include "amdgpu_dm_trace.h" +#include "amdgpu_dm_debugfs.h" + +void dm_crtc_handle_vblank(struct amdgpu_crtc *acrtc) +{ + struct drm_crtc *crtc = &acrtc->base; + struct drm_device *dev = crtc->dev; + unsigned long flags; + + drm_crtc_handle_vblank(crtc); + + spin_lock_irqsave(&dev->event_lock, flags); + + /* Send completion event for cursor-only commits */ + if (acrtc->event && acrtc->pflip_status != AMDGPU_FLIP_SUBMITTED) { + drm_crtc_send_vblank_event(crtc, acrtc->event); + drm_crtc_vblank_put(crtc); + acrtc->event = NULL; + } + + spin_unlock_irqrestore(&dev->event_lock, flags); +} + +bool modeset_required(struct drm_crtc_state *crtc_state, + struct dc_stream_state *new_stream, + struct dc_stream_state *old_stream) +{ + return crtc_state->active && drm_atomic_crtc_needs_modeset(crtc_state); +} + +bool amdgpu_dm_vrr_active_irq(struct amdgpu_crtc *acrtc) + +{ + return acrtc->dm_irq_params.freesync_config.state == + VRR_STATE_ACTIVE_VARIABLE || + acrtc->dm_irq_params.freesync_config.state == + VRR_STATE_ACTIVE_FIXED; +} + +int dm_set_vupdate_irq(struct drm_crtc *crtc, bool enable) +{ + enum dc_irq_source irq_source; + struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); + struct amdgpu_device *adev = drm_to_adev(crtc->dev); + int rc; + + irq_source = IRQ_TYPE_VUPDATE + acrtc->otg_inst; + + rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY; + + DRM_DEBUG_VBL("crtc %d - vupdate irq %sabling: r=%d\n", + acrtc->crtc_id, enable ? "en" : "dis", rc); + return rc; +} + +bool amdgpu_dm_vrr_active(struct dm_crtc_state *dm_state) +{ + return dm_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE || + dm_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED; +} + +static void vblank_control_worker(struct work_struct *work) +{ + struct vblank_control_work *vblank_work = + container_of(work, struct vblank_control_work, work); + struct amdgpu_display_manager *dm = vblank_work->dm; + + mutex_lock(&dm->dc_lock); + + if (vblank_work->enable) + dm->active_vblank_irq_count++; + else if (dm->active_vblank_irq_count) + dm->active_vblank_irq_count--; + + dc_allow_idle_optimizations( + dm->dc, dm->active_vblank_irq_count == 0 ? true : false); + + DRM_DEBUG_KMS("Allow idle optimizations (MALL): %d\n", dm->active_vblank_irq_count == 0); + + /* + * Control PSR based on vblank requirements from OS + * + * If panel supports PSR SU, there's no need to disable PSR when OS is + * submitting fast atomic commits (we infer this by whether the OS + * requests vblank events). Fast atomic commits will simply trigger a + * full-frame-update (FFU); a specific case of selective-update (SU) + * where the SU region is the full hactive*vactive region. See + * fill_dc_dirty_rects(). + */ + if (vblank_work->stream && vblank_work->stream->link) { + if (vblank_work->enable) { + if (vblank_work->stream->link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 && + vblank_work->stream->link->psr_settings.psr_allow_active) + amdgpu_dm_psr_disable(vblank_work->stream); + } else if (vblank_work->stream->link->psr_settings.psr_feature_enabled && + !vblank_work->stream->link->psr_settings.psr_allow_active && + vblank_work->acrtc->dm_irq_params.allow_psr_entry) { + amdgpu_dm_psr_enable(vblank_work->stream); + } + } + + mutex_unlock(&dm->dc_lock); + + dc_stream_release(vblank_work->stream); + + kfree(vblank_work); +} + +static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable) +{ + enum dc_irq_source irq_source; + struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); + struct amdgpu_device *adev = drm_to_adev(crtc->dev); + struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state); + struct amdgpu_display_manager *dm = &adev->dm; + struct vblank_control_work *work; + int rc = 0; + + if (enable) { + /* vblank irq on -> Only need vupdate irq in vrr mode */ + if (amdgpu_dm_vrr_active(acrtc_state)) + rc = dm_set_vupdate_irq(crtc, true); + } else { + /* vblank irq off -> vupdate irq off */ + rc = dm_set_vupdate_irq(crtc, false); + } + + if (rc) + return rc; + + irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst; + + if (!dc_interrupt_set(adev->dm.dc, irq_source, enable)) + return -EBUSY; + + if (amdgpu_in_reset(adev)) + return 0; + + if (dm->vblank_control_workqueue) { + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return -ENOMEM; + + INIT_WORK(&work->work, vblank_control_worker); + work->dm = dm; + work->acrtc = acrtc; + work->enable = enable; + + if (acrtc_state->stream) { + dc_stream_retain(acrtc_state->stream); + work->stream = acrtc_state->stream; + } + + queue_work(dm->vblank_control_workqueue, &work->work); + } + + return 0; +} + +int dm_enable_vblank(struct drm_crtc *crtc) +{ + return dm_set_vblank(crtc, true); +} + +void dm_disable_vblank(struct drm_crtc *crtc) +{ + dm_set_vblank(crtc, false); +} + +static void dm_crtc_destroy_state(struct drm_crtc *crtc, + struct drm_crtc_state *state) +{ + struct dm_crtc_state *cur = to_dm_crtc_state(state); + + /* TODO Destroy dc_stream objects are stream object is flattened */ + if (cur->stream) + dc_stream_release(cur->stream); + + + __drm_atomic_helper_crtc_destroy_state(state); + + + kfree(state); +} + +static struct drm_crtc_state *dm_crtc_duplicate_state(struct drm_crtc *crtc) +{ + struct dm_crtc_state *state, *cur; + + cur = to_dm_crtc_state(crtc->state); + + if (WARN_ON(!crtc->state)) + return NULL; + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return NULL; + + __drm_atomic_helper_crtc_duplicate_state(crtc, &state->base); + + if (cur->stream) { + state->stream = cur->stream; + dc_stream_retain(state->stream); + } + + state->active_planes = cur->active_planes; + state->vrr_infopacket = cur->vrr_infopacket; + state->abm_level = cur->abm_level; + state->vrr_supported = cur->vrr_supported; + state->freesync_config = cur->freesync_config; + state->cm_has_degamma = cur->cm_has_degamma; + state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb; + state->mpo_requested = cur->mpo_requested; + /* TODO Duplicate dc_stream after objects are stream object is flattened */ + + return &state->base; +} + +static void amdgpu_dm_crtc_destroy(struct drm_crtc *crtc) +{ + drm_crtc_cleanup(crtc); + kfree(crtc); +} + +static void dm_crtc_reset_state(struct drm_crtc *crtc) +{ + struct dm_crtc_state *state; + + if (crtc->state) + dm_crtc_destroy_state(crtc, crtc->state); + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (WARN_ON(!state)) + return; + + __drm_atomic_helper_crtc_reset(crtc, &state->base); +} + +#ifdef CONFIG_DEBUG_FS +static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc) +{ + crtc_debugfs_init(crtc); + + return 0; +} +#endif + +/* Implemented only the options currently available for the driver */ +static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { + .reset = dm_crtc_reset_state, + .destroy = amdgpu_dm_crtc_destroy, + .set_config = drm_atomic_helper_set_config, + .page_flip = drm_atomic_helper_page_flip, + .atomic_duplicate_state = dm_crtc_duplicate_state, + .atomic_destroy_state = dm_crtc_destroy_state, + .set_crc_source = amdgpu_dm_crtc_set_crc_source, + .verify_crc_source = amdgpu_dm_crtc_verify_crc_source, + .get_crc_sources = amdgpu_dm_crtc_get_crc_sources, + .get_vblank_counter = amdgpu_get_vblank_counter_kms, + .enable_vblank = dm_enable_vblank, + .disable_vblank = dm_disable_vblank, + .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp, +#if defined(CONFIG_DEBUG_FS) + .late_register = amdgpu_dm_crtc_late_register, +#endif +}; + +static void dm_crtc_helper_disable(struct drm_crtc *crtc) +{ +} + +static int count_crtc_active_planes(struct drm_crtc_state *new_crtc_state) +{ + struct drm_atomic_state *state = new_crtc_state->state; + struct drm_plane *plane; + int num_active = 0; + + drm_for_each_plane_mask(plane, state->dev, new_crtc_state->plane_mask) { + struct drm_plane_state *new_plane_state; + + /* Cursor planes are "fake". */ + if (plane->type == DRM_PLANE_TYPE_CURSOR) + continue; + + new_plane_state = drm_atomic_get_new_plane_state(state, plane); + + if (!new_plane_state) { + /* + * The plane is enable on the CRTC and hasn't changed + * state. This means that it previously passed + * validation and is therefore enabled. + */ + num_active += 1; + continue; + } + + /* We need a framebuffer to be considered enabled. */ + num_active += (new_plane_state->fb != NULL); + } + + return num_active; +} + +static void dm_update_crtc_active_planes(struct drm_crtc *crtc, + struct drm_crtc_state *new_crtc_state) +{ + struct dm_crtc_state *dm_new_crtc_state = + to_dm_crtc_state(new_crtc_state); + + dm_new_crtc_state->active_planes = 0; + + if (!dm_new_crtc_state->stream) + return; + + dm_new_crtc_state->active_planes = + count_crtc_active_planes(new_crtc_state); +} + +static bool dm_crtc_helper_mode_fixup(struct drm_crtc *crtc, + const struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode) +{ + return true; +} + +static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, + crtc); + struct amdgpu_device *adev = drm_to_adev(crtc->dev); + struct dc *dc = adev->dm.dc; + struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state); + int ret = -EINVAL; + + trace_amdgpu_dm_crtc_atomic_check(crtc_state); + + dm_update_crtc_active_planes(crtc, crtc_state); + + if (WARN_ON(unlikely(!dm_crtc_state->stream && + modeset_required(crtc_state, NULL, dm_crtc_state->stream)))) { + return ret; + } + + /* + * We require the primary plane to be enabled whenever the CRTC is, otherwise + * drm_mode_cursor_universal may end up trying to enable the cursor plane while all other + * planes are disabled, which is not supported by the hardware. And there is legacy + * userspace which stops using the HW cursor altogether in response to the resulting EINVAL. + */ + if (crtc_state->enable && + !(crtc_state->plane_mask & drm_plane_mask(crtc->primary))) { + DRM_DEBUG_ATOMIC("Can't enable a CRTC without enabling the primary plane\n"); + return -EINVAL; + } + + /* In some use cases, like reset, no stream is attached */ + if (!dm_crtc_state->stream) + return 0; + + if (dc_validate_stream(dc, dm_crtc_state->stream) == DC_OK) + return 0; + + DRM_DEBUG_ATOMIC("Failed DC stream validation\n"); + return ret; +} + +static const struct drm_crtc_helper_funcs amdgpu_dm_crtc_helper_funcs = { + .disable = dm_crtc_helper_disable, + .atomic_check = dm_crtc_helper_atomic_check, + .mode_fixup = dm_crtc_helper_mode_fixup, + .get_scanout_position = amdgpu_crtc_get_scanout_position, +}; + +int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, + struct drm_plane *plane, + uint32_t crtc_index) +{ + struct amdgpu_crtc *acrtc = NULL; + struct drm_plane *cursor_plane; + + int res = -ENOMEM; + + cursor_plane = kzalloc(sizeof(*cursor_plane), GFP_KERNEL); + if (!cursor_plane) + goto fail; + + cursor_plane->type = DRM_PLANE_TYPE_CURSOR; + res = amdgpu_dm_plane_init(dm, cursor_plane, 0, NULL); + + acrtc = kzalloc(sizeof(struct amdgpu_crtc), GFP_KERNEL); + if (!acrtc) + goto fail; + + res = drm_crtc_init_with_planes( + dm->ddev, + &acrtc->base, + plane, + cursor_plane, + &amdgpu_dm_crtc_funcs, NULL); + + if (res) + goto fail; + + drm_crtc_helper_add(&acrtc->base, &amdgpu_dm_crtc_helper_funcs); + + /* Create (reset) the plane state */ + if (acrtc->base.funcs->reset) + acrtc->base.funcs->reset(&acrtc->base); + + acrtc->max_cursor_width = dm->adev->dm.dc->caps.max_cursor_size; + acrtc->max_cursor_height = dm->adev->dm.dc->caps.max_cursor_size; + + acrtc->crtc_id = crtc_index; + acrtc->base.enabled = false; + acrtc->otg_inst = -1; + + dm->adev->mode_info.crtcs[crtc_index] = acrtc; + drm_crtc_enable_color_mgmt(&acrtc->base, MAX_COLOR_LUT_ENTRIES, + true, MAX_COLOR_LUT_ENTRIES); + drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES); + + return 0; + +fail: + kfree(acrtc); + kfree(cursor_plane); + return res; +} + diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h new file mode 100644 index 0000000000000..1ac8692354cf0 --- /dev/null +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __AMDGPU_DM_CRTC_H__ +#define __AMDGPU_DM_CRTC_H__ + +void dm_crtc_handle_vblank(struct amdgpu_crtc *acrtc); + +bool modeset_required(struct drm_crtc_state *crtc_state, + struct dc_stream_state *new_stream, + struct dc_stream_state *old_stream); + +int dm_set_vupdate_irq(struct drm_crtc *crtc, bool enable); + +bool amdgpu_dm_vrr_active_irq(struct amdgpu_crtc *acrtc); + +bool amdgpu_dm_vrr_active(struct dm_crtc_state *dm_state); + +int dm_enable_vblank(struct drm_crtc *crtc); + +void dm_disable_vblank(struct drm_crtc *crtc); + +int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, + struct drm_plane *plane, + uint32_t link_index); + +#endif + From e6c64182ed946f1cb65799de9ee75f8bcc42b2c6 Mon Sep 17 00:00:00 2001 From: Wenjing Liu Date: Wed, 29 Jun 2022 14:55:43 -0400 Subject: [PATCH 038/134] drm/amd/display: remove number of DSC slices override in DML [why] Number of DSC slices is an input to DML with high dependency on display specific capability. This isn't something DML can decide on its own. DML has to use the original number of DSC slices input to DML during validation without modification. Otherwise the computed DSC delay will not reflect the current configuration and therefore causes validation failures. [how] Remove DML override for number of DSC slices parameter. Tested-by: Daniel Wheeler Reviewed-by: Alvin Lee Acked-by: Alan Liu Signed-off-by: Wenjing Liu Signed-off-by: Alex Deucher --- .../dc/dml/dcn32/display_mode_vba_32.c | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 349e36ae93338..9944f58f0db99 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -1897,26 +1897,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->MaximumSwathWidthInLineBufferChroma); } - /*Number Of DSC Slices*/ - for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { - if (mode_lib->vba.BlendingAndTiming[k] == k) { - if (mode_lib->vba.PixelClockBackEnd[k] > 4800) { - mode_lib->vba.NumberOfDSCSlices[k] = dml_ceil(mode_lib->vba.PixelClockBackEnd[k] / 600, - 4); - } else if (mode_lib->vba.PixelClockBackEnd[k] > 2400) { - mode_lib->vba.NumberOfDSCSlices[k] = 8; - } else if (mode_lib->vba.PixelClockBackEnd[k] > 1200) { - mode_lib->vba.NumberOfDSCSlices[k] = 4; - } else if (mode_lib->vba.PixelClockBackEnd[k] > 340) { - mode_lib->vba.NumberOfDSCSlices[k] = 2; - } else { - mode_lib->vba.NumberOfDSCSlices[k] = 1; - } - } else { - mode_lib->vba.NumberOfDSCSlices[k] = 0; - } - } - dml32_CalculateSwathAndDETConfiguration( mode_lib->vba.DETSizeOverride, mode_lib->vba.UsesMALLForPStateChange, From 074293dd9f61f11898f1f6e01f1560fd4c474025 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Wed, 13 Jul 2022 13:17:41 -0400 Subject: [PATCH 039/134] drm/amd/display: Fix hard hang if DSC is disabled We want to calculate the DTB clock values when DSC is enabled; however, this is not the current behavior implemented in DCN32. Right now, DML is trying to calculate DSC values even if DSC is disabled; as a result, we can have a hard hang due to wrong clock calculation. This commit fixes this issue by moving the calculation after the DSC check. Tested-by: Daniel Wheeler Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../dc/dml/dcn32/display_mode_vba_util_32.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 5a701d9df0f75..febaff7d7343c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -1686,17 +1686,22 @@ double dml32_RequiredDTBCLK( unsigned int AudioRate, unsigned int AudioLayout) { - double PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2); - double HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * - dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); - double HCBlank = 64 + 32 * - dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); - double AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; - double HActiveTribyteRate = PixelWordRate * HCActive / HActive; + double PixelWordRate; + double HCActive; + double HCBlank; + double AverageTribyteRate; + double HActiveTribyteRate; if (DSCEnable != true) return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0); + PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2); + HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * + dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); + HCBlank = 64 + 32 * + dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); + AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; + HActiveTribyteRate = PixelWordRate * HCActive / HActive; return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002; } From a983d263186996002ecafc72f9a0dc5a3a2bfd6f Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Thu, 16 Jun 2022 10:58:08 -0400 Subject: [PATCH 040/134] drm/amd/display: Don't set dram clock change requirement for SubVP [Description] In general cases we want to keep the dram clock change requirement (we prefer configs that support MCLK switch). Only override to false for SubVP. Tested-by: Daniel Wheeler Acked-by: Alan Liu Signed-off-by: Alvin Lee Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index f913daabcca5a..92d87745d9339 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -2984,6 +2984,7 @@ int dcn32_populate_dml_pipes_from_context( int i, pipe_cnt; struct resource_context *res_ctx = &context->res_ctx; struct pipe_ctx *pipe; + bool subvp_in_use = false; dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); @@ -3006,6 +3007,7 @@ int dcn32_populate_dml_pipes_from_context( switch (pipe->stream->mall_stream_config.type) { case SUBVP_MAIN: pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport; + subvp_in_use = true; break; case SUBVP_PHANTOM: pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_phantom_pipe; @@ -3076,6 +3078,14 @@ int dcn32_populate_dml_pipes_from_context( dcn32_update_det_override_for_mpo(dc, context, pipes); + // In general cases we want to keep the dram clock change requirement + // (prefer configs that support MCLK switch). Only override to false + // for SubVP + if (subvp_in_use) + context->bw_ctx.dml.soc.dram_clock_change_requirement_final = false; + else + context->bw_ctx.dml.soc.dram_clock_change_requirement_final = true; + return pipe_cnt; } From 44b0c964201e1e2fbfcdab4472f9260048befbaa Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Tue, 5 Jul 2022 12:22:09 -0400 Subject: [PATCH 041/134] drm/amd/display: Update de-tile override to anticipate pipe splitting [Why] For certain MPO configurations, DML will split a pipe after DET buffer has already been allocated by driver, resulting in allocation of more DET segments than the configurable return buffer has, causing underflow. [How] Determine during DET override calculation whether or not a pipe will be split later on by DML, and distribute DET segments based on expected number of pipes. Tested-by: Daniel Wheeler Reviewed-by: Dmytro Laktyushkin Acked-by: Alan Liu Signed-off-by: Taimur Hassan Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn32/dcn32_resource.c | 22 ++-- .../drm/amd/display/dc/dcn32/dcn32_resource.h | 6 +- .../display/dc/dcn32/dcn32_resource_helpers.c | 112 +++++++++--------- 3 files changed, 69 insertions(+), 71 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 92d87745d9339..631876832dfa7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -2984,7 +2984,7 @@ int dcn32_populate_dml_pipes_from_context( int i, pipe_cnt; struct resource_context *res_ctx = &context->res_ctx; struct pipe_ctx *pipe; - bool subvp_in_use = false; + bool subvp_in_use = false, is_pipe_split_expected[MAX_PIPES]; dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); @@ -3046,6 +3046,9 @@ int dcn32_populate_dml_pipes_from_context( if (dc->debug.enable_single_display_2to1_odm_policy) pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1; } + + is_pipe_split_expected[i] = dcn32_predict_pipe_split(context, pipes[i].pipe, i); + pipe_cnt++; } @@ -3053,8 +3056,7 @@ int dcn32_populate_dml_pipes_from_context( * the DET available for each pipe). Use the DET override input to maintain our driver * policy. */ - switch (pipe_cnt) { - case 1: + if (pipe_cnt == 1 && !is_pipe_split_expected[0]) { pipes[0].pipe.src.det_size_override = DCN3_2_MAX_DET_SIZE; if (pipe->plane_state && !dc->debug.disable_z9_mpc) { if (!is_dual_plane(pipe->plane_state->format)) { @@ -3065,18 +3067,8 @@ int dcn32_populate_dml_pipes_from_context( pipes[0].pipe.src.det_size_override = 320; // 5K or higher } } - break; - case 2: - case 3: - case 4: - // For 2 and 3 pipes, use (MAX_DET_SIZE / pipe_cnt), for 4 pipes use default size for each pipe - for (i = 0; i < pipe_cnt; i++) { - pipes[i].pipe.src.det_size_override = (pipe_cnt < 4) ? (DCN3_2_MAX_DET_SIZE / pipe_cnt) : DCN3_2_DEFAULT_DET_SIZE; - } - break; - } - - dcn32_update_det_override_for_mpo(dc, context, pipes); + } else + dcn32_determine_det_override(context, pipes, is_pipe_split_expected, pipe_cnt); // In general cases we want to keep the dram clock change requirement // (prefer configs that support MCLK switch). Only override to false diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index db4546317cb57..10254ab7e9d9f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -100,7 +100,9 @@ bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc, bool dcn32_subvp_in_use(struct dc *dc, struct dc_state *context); -void dcn32_update_det_override_for_mpo(struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes); +bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st pipe, int index); + +void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_params_st *pipes, + bool *is_pipe_split_expected, int pipe_cnt); #endif /* _DCN32_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index e001f6d1f6c30..a6ef1dba01fe4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -26,6 +26,8 @@ // header file of functions being implemented #include "dcn32_resource.h" #include "dcn20/dcn20_resource.h" +#include "dml/dcn32/display_mode_vba_util_32.h" + /** * ******************************************************************************************** * dcn32_helper_populate_phantom_dlg_params: Get DLG params for phantom pipes and populate pipe_ctx @@ -195,66 +197,68 @@ bool dcn32_subvp_in_use(struct dc *dc, return false; } -/* For MPO we adjust the DET allocation to ensure we have enough DET buffer when an MPO pipe - * is removed. For example for 1 MPO + 1 non-MPO normally we would allocate 6 DET segments - * for each pipe [6, 6, 6]. But when transitioning out of MPO it would change from - * [6, 6, 6] -> [9, 9]. However, if VUPDATE for the non-MPO pipe comes first we would be - * trying to allocate more DET than what's currently available which would result in underflow. - * - * In this case we must ensure there is enough buffer when transitioning in and out of MPO: - * - * 1 MPO (2 plane) + 1 non-MPO case: - * [4, 4, 9]<->[9, 9]: Allocate 4 each for MPO pipes, and maintain 9 for non-MPO pipe - * - * 1 MPO (2 plane) + 2 non-MPO case: - * [3, 3, 5, 5]<->[6, 6, 6] - * - * 1 MPO (3 plane) + 1 non-MPO case: - * [3, 3, 3, 9]<->[4, 4, 9] or [3, 3, 3, 6]<->[9, 9] - * - * For multi-display MPO case all pipes will have 4 segments: - * Removing MPO on one of the displays will result in 3 pipes - * (1 MPO and 1 non-MPO which is covered by single MPO stream case). - */ -void dcn32_update_det_override_for_mpo(struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes) +bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st pipe, int index) { - uint8_t i, mpo_stream_index, pipe_cnt; - uint8_t mpo_stream_count = 0; - uint8_t mpo_planes = 0; // Only used in single display MPO case - unsigned int j; - struct resource_context *res_ctx = &context->res_ctx; + double pscl_throughput, pscl_throughput_chroma, dpp_clk_single_dpp, clock, + clk_frequency = 0.0, vco_speed = context->bw_ctx.dml.soc.dispclk_dppclk_vco_speed_mhz; - for (i = 0; i < context->stream_count; i++) { - if (context->stream_status[i].plane_count > 1) { - mpo_stream_index = i; - mpo_stream_count++; - mpo_planes = context->stream_status[i].plane_count; - } - } + dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(pipe.scale_ratio_depth.hscl_ratio, + pipe.scale_ratio_depth.hscl_ratio_c, + pipe.scale_ratio_depth.vscl_ratio, + pipe.scale_ratio_depth.vscl_ratio_c, + context->bw_ctx.dml.ip.max_dchub_pscl_bw_pix_per_clk, + context->bw_ctx.dml.ip.max_pscl_lb_bw_pix_per_clk, + pipe.dest.pixel_rate_mhz, + pipe.src.source_format, + pipe.scale_taps.htaps, + pipe.scale_taps.htaps_c, + pipe.scale_taps.vtaps, + pipe.scale_taps.vtaps_c, - if (mpo_stream_count == 1) { - for (j = 0, pipe_cnt = 0; j < dc->res_pool->pipe_count; j++) { - if (!res_ctx->pipe_ctx[j].stream) - continue; + /* Output */ + &pscl_throughput, &pscl_throughput_chroma, + &dpp_clk_single_dpp); - if (context->res_ctx.pipe_ctx[j].stream == context->streams[mpo_stream_index]) { - // For 3 plane MPO + 1 non-MPO, do [3, 3, 3, 9] - // For 2 plane MPO + 1 non-MPO, do [4, 4, 9] - if (context->stream_count - mpo_stream_count == 1) - pipes[pipe_cnt].pipe.src.det_size_override = DCN3_2_DET_SEG_SIZE * (mpo_planes == 2 ? 4 : 3); - else if (context->stream_count - mpo_stream_count == 2) - pipes[pipe_cnt].pipe.src.det_size_override = DCN3_2_DET_SEG_SIZE * 3; + clock = dpp_clk_single_dpp * (1 + context->bw_ctx.dml.soc.dcn_downspread_percent / 100); + + if (clock > 0) + clk_frequency = vco_speed * 4.0 / ((int) (vco_speed * 4.0)); + + if (clk_frequency > context->bw_ctx.dml.soc.clock_limits[index].dppclk_mhz) + return true; + else + return false; +} + +void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_params_st *pipes, + bool *is_pipe_split_expected, int pipe_cnt) +{ + int i, j, count, stream_segments, pipe_segments[MAX_PIPES]; + + if (context->stream_count > 0) { + stream_segments = 18 / context->stream_count; + for (i = 0, count = 0; i < context->stream_count; i++) { + for (j = 0; j < pipe_cnt; j++) { + if (context->res_ctx.pipe_ctx[j].stream == context->streams[i]) { + count++; + if (is_pipe_split_expected[j]) + count++; + } + } + pipe_segments[i] = stream_segments / count; + } - } else if (context->res_ctx.pipe_ctx[j].stream && - context->res_ctx.pipe_ctx[j].stream != context->streams[mpo_stream_index]) { - // Update for non-MPO pipes - if (context->stream_count - mpo_stream_count == 1) - pipes[pipe_cnt].pipe.src.det_size_override = DCN3_2_DET_SEG_SIZE * 9; - else if (context->stream_count - mpo_stream_count == 2) - pipes[pipe_cnt].pipe.src.det_size_override = DCN3_2_DET_SEG_SIZE * 5; + for (i = 0; i < pipe_cnt; i++) { + pipes[i].pipe.src.det_size_override = 0; + for (j = 0; j < context->stream_count; j++) { + if (context->res_ctx.pipe_ctx[i].stream == context->streams[j]) { + pipes[i].pipe.src.det_size_override = pipe_segments[j] * DCN3_2_DET_SEG_SIZE; + break; + } } - pipe_cnt++; } + } else { + for (i = 0; i < pipe_cnt; i++) + pipes[i].pipe.src.det_size_override = 4 * DCN3_2_DET_SEG_SIZE; //DCN3_2_DEFAULT_DET_SIZE } } From 557f9100c74cfa033488563240f567466613cefa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Thu, 14 Jul 2022 13:44:58 -0300 Subject: [PATCH 042/134] drm/amd/display: Remove unused clk_src variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the variable clk_src from the function dcn3_get_pix_clk_dividers. This was pointed by clang with the following warning: drivers/gpu/drm/amd/amdgpu/../display/dc/dce/dce_clock_source.c:1279:25: warning: variable 'clk_src' set but not used [-Wunused-but-set-variable] struct dce110_clk_src *clk_src; ^ 1 warning generated. Reviewed-by: André Almeida Signed-off-by: Maíra Canal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index 5cc7cc0b2f2d3..d55da1ab1ac2e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -1276,9 +1276,7 @@ static uint32_t dcn3_get_pix_clk_dividers( struct pll_settings *pll_settings) { unsigned long long actual_pix_clk_100Hz = pix_clk_params ? pix_clk_params->requested_pix_clk_100hz : 0; - struct dce110_clk_src *clk_src; - clk_src = TO_DCE110_CLK_SRC(cs); DC_LOGGER_INIT(); if (pix_clk_params == NULL || pll_settings == NULL From d3e19f76121178420c3efc9688adfd5a448996e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Thu, 14 Jul 2022 13:44:59 -0300 Subject: [PATCH 043/134] drm/amd/display: Remove unused dml32_CalculatedoublePipeDPPCLKAndSCLThroughput function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove dml32_CalculatedoublePipeDPPCLKAndSCLThroughput function, which is not used in the codebase. This was pointed by clang with the following warning: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_util_32.c:393:6: warning: no previous prototype for function 'dml32_CalculatedoublePipeDPPCLKAndSCLThroughput' [-Wmissing-prototypes] void dml32_CalculatedoublePipeDPPCLKAndSCLThroughput( ^ drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_util_32.c:393:1: note: declare 'static' if the function is not intended to be used outside of this translation unit void dml32_CalculatedoublePipeDPPCLKAndSCLThroughput( ^ static 1 warning generated. Reviewed-by: André Almeida Signed-off-by: Maíra Canal Signed-off-by: Alex Deucher --- .../dc/dml/dcn32/display_mode_vba_util_32.c | 54 ------------------- 1 file changed, 54 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index febaff7d7343c..67cbc7923652b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -390,60 +390,6 @@ void dml32_CalculateBytePerPixelAndBlockSizes( #endif } // CalculateBytePerPixelAndBlockSizes -void dml32_CalculatedoublePipeDPPCLKAndSCLThroughput( - double HRatio, - double HRatioChroma, - double VRatio, - double VRatioChroma, - double MaxDCHUBToPSCLThroughput, - double MaxPSCLToLBThroughput, - double PixelClock, - enum source_format_class SourcePixelFormat, - unsigned int HTaps, - unsigned int HTapsChroma, - unsigned int VTaps, - unsigned int VTapsChroma, - - /* output */ - double *PSCL_THROUGHPUT, - double *PSCL_THROUGHPUT_CHROMA, - double *DPPCLKUsingdoubleDPP) -{ - double DPPCLKUsingdoubleDPPLuma; - double DPPCLKUsingdoubleDPPChroma; - - if (HRatio > 1) { - *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / - dml_ceil((double) HTaps / 6.0, 1.0)); - } else { - *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); - } - - DPPCLKUsingdoubleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio / - *PSCL_THROUGHPUT, 1); - - if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingdoubleDPPLuma < 2 * PixelClock) - DPPCLKUsingdoubleDPPLuma = 2 * PixelClock; - - if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 && - SourcePixelFormat != dm_rgbe_alpha)) { - *PSCL_THROUGHPUT_CHROMA = 0; - *DPPCLKUsingdoubleDPP = DPPCLKUsingdoubleDPPLuma; - } else { - if (HRatioChroma > 1) { - *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * - HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0)); - } else { - *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); - } - DPPCLKUsingdoubleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma), - HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1); - if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingdoubleDPPChroma < 2 * PixelClock) - DPPCLKUsingdoubleDPPChroma = 2 * PixelClock; - *DPPCLKUsingdoubleDPP = dml_max(DPPCLKUsingdoubleDPPLuma, DPPCLKUsingdoubleDPPChroma); - } -} - void dml32_CalculateSwathAndDETConfiguration( unsigned int DETSizeOverride[], enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], From 4f5a17b221cc48d66cce13fe1531ca8fc99078b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Thu, 14 Jul 2022 13:45:00 -0300 Subject: [PATCH 044/134] drm/amd/display: Remove unused NumberOfStates variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the unused unsigned int NumberOfStates from the file, which was declared but never hooked up. This was pointed by clang with the following warning: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:33:27: warning: unused variable 'NumberOfStates' [-Wunused-const-variable] static const unsigned int NumberOfStates = DC__VOLTAGE_STATES; ^ 1 warning generated. Reviewed-by: André Almeida Signed-off-by: Maíra Canal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 9944f58f0db99..1efce9f5eae3b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -30,8 +30,6 @@ #include "../dml_inline_defs.h" #include "display_mode_vba_util_32.h" -static const unsigned int NumberOfStates = DC__VOLTAGE_STATES; - void dml32_recalculate(struct display_mode_lib *mode_lib); static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation( struct display_mode_lib *mode_lib); From 1cccdfe21eb6b8dd4371378b4b61223eb37f1ef7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Thu, 14 Jul 2022 13:45:01 -0300 Subject: [PATCH 045/134] drm/amd/display: Remove unused variables from dml_rq_dlg_get_dlg_params MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the variables dispclk_delay_subtotal and dppclk_delay_subtotal from the function dml_rq_dlg_get_dlg_params. This was pointed by clang with the following warning: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/display_rq_dlg_calc_31.c:920:15: warning: variable 'dispclk_delay_subtotal' set but not used [-Wunused-but-set-variable] unsigned int dispclk_delay_subtotal; ^ drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/display_rq_dlg_calc_31.c:919:15: warning: variable 'dppclk_delay_subtotal' set but not used [-Wunused-but-set-variable] unsigned int dppclk_delay_subtotal; ^ 2 warnings generated. Reviewed-by: André Almeida Signed-off-by: Maíra Canal Signed-off-by: Alex Deucher --- .../dc/dml/dcn31/display_rq_dlg_calc_31.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c index c94cf6e01e250..66b82e4f05c6e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c @@ -866,7 +866,6 @@ static void dml_rq_dlg_get_dlg_params( { const display_pipe_source_params_st *src = &e2e_pipe_param[pipe_idx].pipe.src; const display_pipe_dest_params_st *dst = &e2e_pipe_param[pipe_idx].pipe.dest; - const display_output_params_st *dout = &e2e_pipe_param[pipe_idx].dout; const display_clocks_and_cfg_st *clks = &e2e_pipe_param[pipe_idx].clks_cfg; const scaler_ratio_depth_st *scl = &e2e_pipe_param[pipe_idx].pipe.scale_ratio_depth; const scaler_taps_st *taps = &e2e_pipe_param[pipe_idx].pipe.scale_taps; @@ -916,9 +915,6 @@ static void dml_rq_dlg_get_dlg_params( unsigned int vupdate_width; unsigned int vready_offset; - unsigned int dppclk_delay_subtotal; - unsigned int dispclk_delay_subtotal; - unsigned int vstartup_start; unsigned int dst_x_after_scaler; unsigned int dst_y_after_scaler; @@ -1037,21 +1033,6 @@ static void dml_rq_dlg_get_dlg_params( vupdate_width = dst->vupdate_width; vready_offset = dst->vready_offset; - dppclk_delay_subtotal = mode_lib->ip.dppclk_delay_subtotal; - dispclk_delay_subtotal = mode_lib->ip.dispclk_delay_subtotal; - - if (scl_enable) - dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl; - else - dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_scl_lb_only; - - dppclk_delay_subtotal += mode_lib->ip.dppclk_delay_cnvc_formatter + src->num_cursors * mode_lib->ip.dppclk_delay_cnvc_cursor; - - if (dout->dsc_enable) { - double dsc_delay = get_dsc_delay(mode_lib, e2e_pipe_param, num_pipes, pipe_idx); // FROM VBA - dispclk_delay_subtotal += dsc_delay; - } - vstartup_start = dst->vstartup_start; if (interlaced) { if (vstartup_start / 2.0 - (double) (vready_offset + vupdate_width + vupdate_offset) / htotal <= vblank_end / 2.0) From 54c3e9493cd502d63ff3643fa70b5f98b3201846 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Thu, 14 Jul 2022 13:45:03 -0300 Subject: [PATCH 046/134] drm/amd/display: Remove unused variables from dcn10_stream_encoder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The variable regval from the function enc1_update_generic_info_packet and the variables dynamic_range_rgb and dynamic_range_ycbcr from the function enc1_stream_encoder_dp_set_stream_attribute are not currently used. This was pointed by clang with the following warnings: drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_stream_encoder.c:62:11: warning: variable 'regval' set but not used [-Wunused-but-set-variable] uint32_t regval; ^ drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_stream_encoder.c:262:10: warning: variable 'dynamic_range_rgb' set but not used [-Wunused-but-set-variable] uint8_t dynamic_range_rgb = 0; /*full range*/ ^ drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_stream_encoder.c:263:10: warning: variable 'dynamic_range_ycbcr' set but not used [-Wunused-but-set-variable] uint8_t dynamic_range_ycbcr = 1; /*bt709*/ ^ 3 warnings generated. Reviewed-by: André Almeida Signed-off-by: Maíra Canal Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn10/dcn10_stream_encoder.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c index c99c6fababa95..484e7cdf00b8c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c @@ -59,7 +59,6 @@ void enc1_update_generic_info_packet( uint32_t packet_index, const struct dc_info_packet *info_packet) { - uint32_t regval; /* TODOFPGA Figure out a proper number for max_retries polling for lock * use 50 for now. */ @@ -88,7 +87,6 @@ void enc1_update_generic_info_packet( REG_UPDATE(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT_CLR, 1); /* choose which generic packet to use */ - regval = REG_READ(AFMT_VBI_PACKET_CONTROL); REG_UPDATE(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_INDEX, packet_index); @@ -259,8 +257,6 @@ void enc1_stream_encoder_dp_set_stream_attribute( uint32_t h_back_porch; uint8_t synchronous_clock = 0; /* asynchronous mode */ uint8_t colorimetry_bpc; - uint8_t dynamic_range_rgb = 0; /*full range*/ - uint8_t dynamic_range_ycbcr = 1; /*bt709*/ uint8_t dp_pixel_encoding = 0; uint8_t dp_component_depth = 0; @@ -372,18 +368,15 @@ void enc1_stream_encoder_dp_set_stream_attribute( switch (output_color_space) { case COLOR_SPACE_SRGB: misc1 = misc1 & ~0x80; /* bit7 = 0*/ - dynamic_range_rgb = 0; /*full range*/ break; case COLOR_SPACE_SRGB_LIMITED: misc0 = misc0 | 0x8; /* bit3=1 */ misc1 = misc1 & ~0x80; /* bit7 = 0*/ - dynamic_range_rgb = 1; /*limited range*/ break; case COLOR_SPACE_YCBCR601: case COLOR_SPACE_YCBCR601_LIMITED: misc0 = misc0 | 0x8; /* bit3=1, bit4=0 */ misc1 = misc1 & ~0x80; /* bit7 = 0*/ - dynamic_range_ycbcr = 0; /*bt601*/ if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) misc0 = misc0 | 0x2; /* bit2=0, bit1=1 */ else if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR444) @@ -393,15 +386,12 @@ void enc1_stream_encoder_dp_set_stream_attribute( case COLOR_SPACE_YCBCR709_LIMITED: misc0 = misc0 | 0x18; /* bit3=1, bit4=1 */ misc1 = misc1 & ~0x80; /* bit7 = 0*/ - dynamic_range_ycbcr = 1; /*bt709*/ if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) misc0 = misc0 | 0x2; /* bit2=0, bit1=1 */ else if (hw_crtc_timing.pixel_encoding == PIXEL_ENCODING_YCBCR444) misc0 = misc0 | 0x4; /* bit2=1, bit1=0 */ break; case COLOR_SPACE_2020_RGB_LIMITEDRANGE: - dynamic_range_rgb = 1; /*limited range*/ - break; case COLOR_SPACE_2020_RGB_FULLRANGE: case COLOR_SPACE_2020_YCBCR: case COLOR_SPACE_XR_RGB: From 869618c3440227f848ff9ac55aa64d523a60476e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Thu, 14 Jul 2022 13:45:04 -0300 Subject: [PATCH 047/134] drm/amd/display: Remove unused MaxUsedBW variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the variable MaxUsedBW from the function DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation. As a side-effect, the variables MaxPerPlaneVActiveWRBandwidth and WRBandwidth are also removed. This was pointed by clang with the following warning: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn30/display_mode_vba_30.c:3043:10: warning: variable 'MaxUsedBW' set but not used [-Wunused-but-set-variable] double MaxUsedBW = 0; ^ 1 warning generated. Reviewed-by: André Almeida Signed-off-by: Maíra Canal Signed-off-by: Alex Deucher --- .../dc/dml/dcn30/display_mode_vba_30.c | 28 ------------------- 1 file changed, 28 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c index 842eb94ebe04b..876b321b30ca6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c @@ -3037,40 +3037,12 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman { //Maximum Bandwidth Used - double TotalWRBandwidth = 0; - double MaxPerPlaneVActiveWRBandwidth = 0; - double WRBandwidth = 0; - double MaxUsedBW = 0; - for (k = 0; k < v->NumberOfActivePlanes; ++k) { - if (v->WritebackEnable[k] == true - && v->WritebackPixelFormat[k] == dm_444_32) { - WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] - / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4; - } else if (v->WritebackEnable[k] == true) { - WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] - / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8; - } - TotalWRBandwidth = TotalWRBandwidth + WRBandwidth; - MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth); - } - v->TotalDataReadBandwidth = 0; for (k = 0; k < v->NumberOfActivePlanes; ++k) { v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]; } - - { - double MaxPerPlaneVActiveRDBandwidth = 0; - for (k = 0; k < v->NumberOfActivePlanes; ++k) { - MaxPerPlaneVActiveRDBandwidth = dml_max(MaxPerPlaneVActiveRDBandwidth, - v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]); - - } - } - - MaxUsedBW = MaxTotalRDBandwidth + TotalWRBandwidth; } // VStartup Margin From fbcc38811fcb47335899a3776b4fd5670db4e228 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Sat, 16 Jul 2022 18:51:44 -0100 Subject: [PATCH 048/134] drm/amd/display: move dcn31_update_soc_for_wm_a func to dml fpu folder Although dcn31_update_soc_for_wm_a() is only called in dml/dcn31/dcn31_fpu by dc->res_pool->funcs->update_soc_for_wm_a(dc, context), it's declared in dcn31_resource that is not FPU protected. Move this function to dcn31_fpu file as part of the work to isolate FPU code. Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 9 --------- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h | 1 - drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c | 9 +++++++++ drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h | 2 ++ 5 files changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 5e924d0389ccf..178d40c0d70ae 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -1716,15 +1716,6 @@ int dcn31_populate_dml_pipes_from_context( return pipe_cnt; } -void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) -{ - if (dc->clk_mgr->bw_params->wm_table.entries[WM_A].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_exit_time_us; - } -} - void dcn31_calculate_wm_and_dlg( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h index 393458015d6a4..41f8ec99da6b3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h @@ -59,7 +59,6 @@ dcn31_set_mcif_arb_params(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int pipe_cnt); -void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context); struct resource_pool *dcn31_create_resource_pool( const struct dc_init_data *init_data, diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index ba371769dc3e3..450ebd8385056 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -69,6 +69,7 @@ #include "virtual/virtual_stream_encoder.h" #include "dce110/dce110_resource.h" #include "dml/display_mode_vba.h" +#include "dml/dcn31/dcn31_fpu.h" #include "dcn314/dcn314_dccg.h" #include "dcn10/dcn10_resource.h" #include "dcn31/dcn31_panel_cntl.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c index 7be3476989ce9..facac3daeaca6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c @@ -435,6 +435,15 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = { .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, }; +void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) +{ + if (dc->clk_mgr->bw_params->wm_table.entries[WM_A].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_exit_time_us; + } +} + void dcn31_calculate_wm_and_dlg_fp( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h index 24ac19c836876..0a10de80c1a42 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h @@ -31,6 +31,8 @@ #define DCN3_15_MIN_COMPBUF_SIZE_KB 128 #define DCN3_16_DEFAULT_DET_SIZE 192 +void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context); + void dcn31_calculate_wm_and_dlg_fp( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, From 4686177f7d2140cdd9d031702c2b53ac4c89340a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Thu, 14 Jul 2022 16:17:44 -0300 Subject: [PATCH 049/134] drm/amd/debugfs: Expose GFXOFF state to userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GFXOFF has two different "state" values: one to define if the GPU is allowed/disallowed to enter GFXOFF, usually called state; and another one to define if currently GFXOFF is being used, usually called status. Even when GFXOFF is allowed, GPU firmware can decide to not used it accordingly to the GPU load. Userspace can allow/disallow GPUs to enter into GFXOFF via debugfs. The kernel maintains a counter of requests for GFXOFF (gfx_off_req_count) that should be decreased to allow GFXOFF and increased to disallow. The issue with this interface is that userspace can't be sure if GFXOFF is currently allowed. Even by checking amdgpu_gfxoff file, one might get an ambiguous 2, that means that GPU is currently out of GFXOFF, but that can be either because it's currently disallowed or because it's allowed but given the current GPU load it's enabled. Then, userspace needs to rely on the fact that GFXOFF is enabled by default on boot and to track this information. To make userspace life easier and GFXOFF more reliable, return the current state of GFXOFF to userspace when reading amdgpu_gfxoff with the same semantics of writing: 0 means not allowed, not 0 means allowed. Expose the current status of GFXOFF through a new file, amdgpu_gfxoff_status. Signed-off-by: André Almeida Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 49 ++++++++++++++++++++- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index f3b3c688e4e70..e2eec985adb3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1117,13 +1117,50 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf, } while (size) { - uint32_t value; + u32 value = adev->gfx.gfx_off_state; + + r = put_user(value, (u32 *)buf); + if (r) + goto out; + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + r = result; +out: + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + + return r; +} + +static ssize_t amdgpu_debugfs_gfxoff_status_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + while (size) { + u32 value; r = amdgpu_get_gfx_off_status(adev, &value); if (r) goto out; - r = put_user(value, (uint32_t *)buf); + r = put_user(value, (u32 *)buf); if (r) goto out; @@ -1206,6 +1243,12 @@ static const struct file_operations amdgpu_debugfs_gfxoff_fops = { .llseek = default_llseek }; +static const struct file_operations amdgpu_debugfs_gfxoff_status_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_gfxoff_status_read, + .llseek = default_llseek +}; + static const struct file_operations *debugfs_regs[] = { &amdgpu_debugfs_regs_fops, &amdgpu_debugfs_regs2_fops, @@ -1217,6 +1260,7 @@ static const struct file_operations *debugfs_regs[] = { &amdgpu_debugfs_wave_fops, &amdgpu_debugfs_gpr_fops, &amdgpu_debugfs_gfxoff_fops, + &amdgpu_debugfs_gfxoff_status_fops, }; static const char *debugfs_regs_names[] = { @@ -1230,6 +1274,7 @@ static const char *debugfs_regs_names[] = { "amdgpu_wave", "amdgpu_gpr", "amdgpu_gfxoff", + "amdgpu_gfxoff_status", }; /** From 7a06e125872929247f78f363d1dc2dbd528631ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Thu, 14 Jul 2022 16:17:45 -0300 Subject: [PATCH 050/134] Documentation/gpu: Add GFXOFF section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a GFXOFF section at "GPU Power Controls" file, explaining what it is and how userspace can interact with it. v2: minor tweaks to the documenation (Alex) Signed-off-by: André Almeida Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu/thermal.rst | 41 ++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/Documentation/gpu/amdgpu/thermal.rst b/Documentation/gpu/amdgpu/thermal.rst index 8aeb0186c9ef8..997231b6adcf9 100644 --- a/Documentation/gpu/amdgpu/thermal.rst +++ b/Documentation/gpu/amdgpu/thermal.rst @@ -63,3 +63,44 @@ gpu_metrics .. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c :doc: gpu_metrics + +GFXOFF +====== + +GFXOFF is a feature found in most recent GPUs that saves power at runtime. The +card's RLC (RunList Controller) firmware powers off the gfx engine +dynamically when there is no workload on gfx or compute pipes. GFXOFF is on by +default on supported GPUs. + +Userspace can interact with GFXOFF through a debugfs interface: + +``amdgpu_gfxoff`` +----------------- + +Use it to enable/disable GFXOFF, and to check if it's current enabled/disabled:: + + $ xxd -l1 -p /sys/kernel/debug/dri/0/amdgpu_gfxoff + 01 + +- Write 0 to disable it, and 1 to enable it. +- Read 0 means it's disabled, 1 it's enabled. + +If it's enabled, that means that the GPU is free to enter into GFXOFF mode as +needed. Disabled means that it will never enter GFXOFF mode. + +``amdgpu_gfxoff_status`` +------------------------ + +Read it to check current GFXOFF's status of a GPU:: + + $ xxd -l1 -p /sys/kernel/debug/dri/0/amdgpu_gfxoff_status + 02 + +- 0: GPU is in GFXOFF state, the gfx engine is powered down. +- 1: Transition out of GFXOFF state +- 2: Not in GFXOFF state +- 3: Transition into GFXOFF state + +If GFXOFF is enabled, the value will be transitioning around [0, 3], always +getting into 0 when possible. When it's disabled, it's always at 2. Returns +``-EINVAL`` if it's not supported. From 7b5b0d196c2e6cad87cb1e3ce285c7885f2bd796 Mon Sep 17 00:00:00 2001 From: Vladimir Stempen Date: Wed, 6 Jul 2022 15:57:12 -0400 Subject: [PATCH 051/134] drm/amd/display: Disable GPUVM in IP resource configuration [Why] VM enabled in IP configuration causes UCLK not reaching DPM0. The expectation for VM enable should be that KMD will indicate to DAL when VM is enabled, then DAL will set the bit accordingly [How] Set gpuvm_enable to zero in DCN3_20 and DCN3_21 resource. Tested-by: Daniel Wheeler Reviewed-by: Martin Leung Acked-by: Alan Liu Signed-off-by: Vladimir Stempen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 631876832dfa7..0cb44ea9753b6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -120,7 +120,7 @@ static const struct IP_BASE DCN_BASE = { { { { 0x00000012, 0x000000C0, 0x000034C #define DCN3_2_MIN_COMPBUF_SIZE_KB 128 struct _vcs_dpi_ip_params_st dcn3_2_ip = { - .gpuvm_enable = 1, + .gpuvm_enable = 0, .gpuvm_max_page_table_levels = 4, .hostvm_enable = 0, .rob_buffer_size_kbytes = 128, diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index ebbeebf972dc0..d218c6dd71aa7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -123,7 +123,7 @@ static const struct IP_BASE DCN_BASE = { { { { 0x00000012, 0x000000C0, 0x000034C #define DCN3_2_DEFAULT_DET_SIZE 256 struct _vcs_dpi_ip_params_st dcn3_21_ip = { - .gpuvm_enable = 1, + .gpuvm_enable = 0, .gpuvm_max_page_table_levels = 4, .hostvm_enable = 0, .rob_buffer_size_kbytes = 128, From 660f46e16c4b8a34978012a9f10a32a16db3e98f Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Fri, 8 Jul 2022 13:32:46 -0400 Subject: [PATCH 052/134] drm/amd/display: Loop through all pipes for DET allocation [Why & How] There are cases where the pipes populated are not all at the top of the pipes list under context. Loop through all pipes for DET allocation instead of just the number of populated ones, even if some unpopulated pipes are iterated through unnecessarily. Tested-by: Daniel Wheeler Reviewed-by: Alvin Lee Acked-by: Alan Liu Signed-off-by: Taimur Hassan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 0cb44ea9753b6..32da47e248397 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -3068,7 +3068,7 @@ int dcn32_populate_dml_pipes_from_context( } } } else - dcn32_determine_det_override(context, pipes, is_pipe_split_expected, pipe_cnt); + dcn32_determine_det_override(context, pipes, is_pipe_split_expected, dc->res_pool->pipe_count); // In general cases we want to keep the dram clock change requirement // (prefer configs that support MCLK switch). Only override to false From f3cd57e499e6904b7e356d11bd33d617341b3f24 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Tue, 19 Jul 2022 11:55:07 -0400 Subject: [PATCH 053/134] drm/amd/display: Disable dmcu fw loading for dcn314 [Why] DCN 3.1.4 uses dmub not dmcu. Attempt to identify dmcu firmware for dcn314 results in dm init error: "Unsupported ASIC type" [How] Add dcn314 to the list of asics that don't require dmcu Signed-off-by: Roman Li Reviewed-by: Alex Deucher Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 4e51b06fcdd9b..a9f9c929dca6b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1805,6 +1805,7 @@ static int load_dmcu_fw(struct amdgpu_device *adev) case IP_VERSION(3, 0, 1): case IP_VERSION(3, 1, 2): case IP_VERSION(3, 1, 3): + case IP_VERSION(3, 1, 4): case IP_VERSION(3, 1, 5): case IP_VERSION(3, 1, 6): case IP_VERSION(3, 2, 0): From 869b10ac8d2300327f554d83f4dbab041bf27d49 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Mon, 13 Jun 2022 12:21:59 -0400 Subject: [PATCH 054/134] drm/amdgpu: add dm ip block for dcn 3.1.4 Adding dm ip block to enable display on dcn 3.1.4. Signed-off-by: Roman Li Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 0ba56e2ebf09b..242d1847c4aa5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1716,6 +1716,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(3, 0, 1): case IP_VERSION(3, 1, 2): case IP_VERSION(3, 1, 3): + case IP_VERSION(3, 1, 4): case IP_VERSION(3, 1, 5): case IP_VERSION(3, 1, 6): case IP_VERSION(3, 2, 0): From 4d37fd51b4440cf6a02942c0a169ee18a902fb5b Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Sat, 16 Jul 2022 11:57:32 +0800 Subject: [PATCH 055/134] drm/radeon: Fix comment typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The double `have' is duplicated in line 696, remove one. Reviewed-by: Christian König Signed-off-by: Jason Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 84843b3b3aef4..261fcbae88d78 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -693,7 +693,7 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data, } /* !! DONT REMOVE !! - * We don't support vm_id yet, to be sure we don't have have broken + * We don't support vm_id yet, to be sure we don't have broken * userspace, reject anyone trying to use non 0 value thus moving * forward we can use those fields without breaking existant userspace */ From c19a23fadd279f433424b4d6436fe4ab0020e20c Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Sat, 16 Jul 2022 12:28:41 +0800 Subject: [PATCH 056/134] drm/amdgpu: Fix comment typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The double `to' is duplicated in the comment, remove one. Reviewed-by: Christian König Signed-off-by: Jason Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 4c66aff11a400..52d1fd7d8e811 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -80,7 +80,7 @@ * - 3.24.0 - Add high priority compute support for gfx9 * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE. - * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation. + * - 3.27.0 - Add new chunk to AMDGPU_CS to enable BO_LIST creation. * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES * - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID * - 3.30.0 - Add AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE. From 37edc99979b717312e60cf3463ab756f5a3d6de6 Mon Sep 17 00:00:00 2001 From: Chris Park Date: Fri, 8 Jul 2022 15:36:18 -0400 Subject: [PATCH 057/134] drm/amd/display: Update Cursor Attribute MALL cache [Why] Cursor size can update without MALL cache update. Update the register on cursor attribute as well. [How] Update cursor MALL cache on cursor attribute update. Tested-by: Daniel Wheeler Reviewed-by: Alvin Lee Acked-by: Alan Liu Signed-off-by: Chris Park Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c | 40 ++++++++++++++++++- .../gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h | 3 ++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c index 0a7d64306481b..3176b04a77400 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c @@ -94,6 +94,44 @@ void hubp32_phantom_hubp_post_enable(struct hubp *hubp) } } +void hubp32_cursor_set_attributes( + struct hubp *hubp, + const struct dc_cursor_attributes *attr) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + enum cursor_pitch hw_pitch = hubp1_get_cursor_pitch(attr->pitch); + enum cursor_lines_per_chunk lpc = hubp2_get_lines_per_chunk( + attr->width, attr->color_format); + + hubp->curs_attr = *attr; + + REG_UPDATE(CURSOR_SURFACE_ADDRESS_HIGH, + CURSOR_SURFACE_ADDRESS_HIGH, attr->address.high_part); + REG_UPDATE(CURSOR_SURFACE_ADDRESS, + CURSOR_SURFACE_ADDRESS, attr->address.low_part); + + REG_UPDATE_2(CURSOR_SIZE, + CURSOR_WIDTH, attr->width, + CURSOR_HEIGHT, attr->height); + + REG_UPDATE_4(CURSOR_CONTROL, + CURSOR_MODE, attr->color_format, + CURSOR_2X_MAGNIFY, attr->attribute_flags.bits.ENABLE_MAGNIFICATION, + CURSOR_PITCH, hw_pitch, + CURSOR_LINES_PER_CHUNK, lpc); + + REG_SET_2(CURSOR_SETTINGS, 0, + /* no shift of the cursor HDL schedule */ + CURSOR0_DST_Y_OFFSET, 0, + /* used to shift the cursor chunk request deadline */ + CURSOR0_CHUNK_HDL_ADJUST, 3); + + if (attr->width * attr->height * 4 > 16384) + REG_UPDATE(DCHUBP_MALL_CONFIG, USE_MALL_FOR_CURSOR, true); + else + REG_UPDATE(DCHUBP_MALL_CONFIG, USE_MALL_FOR_CURSOR, false); +} + static struct hubp_funcs dcn32_hubp_funcs = { .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, .hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled, @@ -106,7 +144,7 @@ static struct hubp_funcs dcn32_hubp_funcs = { .set_blank = hubp2_set_blank, .dcc_control = hubp3_dcc_control, .mem_program_viewport = min_set_viewport, - .set_cursor_attributes = hubp2_cursor_set_attributes, + .set_cursor_attributes = hubp32_cursor_set_attributes, .set_cursor_position = hubp2_cursor_set_position, .hubp_clk_cntl = hubp2_clk_cntl, .hubp_vtg_sel = hubp2_vtg_sel, diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h index 00b4211389c27..c4315d50fbb06 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h @@ -58,6 +58,9 @@ void hubp32_prepare_subvp_buffering(struct hubp *hubp, bool enable); void hubp32_phantom_hubp_post_enable(struct hubp *hubp); +void hubp32_cursor_set_attributes(struct hubp *hubp, + const struct dc_cursor_attributes *attr); + bool hubp32_construct( struct dcn20_hubp *hubp2, struct dc_context *ctx, From f4b4e41a2e05270cd90c5817ab514ace95555874 Mon Sep 17 00:00:00 2001 From: Jun Lei Date: Fri, 24 Jun 2022 16:28:50 -0400 Subject: [PATCH 058/134] drm/amd/display: Update DML logic for unbounded req handling [why] Unbounded request logic in resource/DML has some issues where unbounded request is being enabled incorrectly. SW today enables unbounded request unconditionally in hardware, on the assumption that HW can always support it in single pipe scenarios. This worked until now because the same assumption is made in DML. A new DML update is needed to fix a bug, where there are single pipe scenarios where unbounded cannot be enabled, and this change in DML needs to be ported in, and dcn32 resource logic fixed. [how] First, dcn32_resource should program unbounded req in HW according to unbounded req enablement output from DML, as opposed to DML input. Second, port in DML update which disables unbounded req in some scenarios to fix an issue with poor stutter performance Tested-by: Daniel Wheeler Reviewed-by: Rodrigo Siqueira Signed-off-by: Jun Lei Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn32/dcn32_resource.c | 11 +++- .../dc/dml/dcn32/display_mode_vba_32.c | 44 +++++++++++++--- .../dc/dml/dcn32/display_mode_vba_util_32.c | 51 ++++++++++++++++--- .../dc/dml/dcn32/display_mode_vba_util_32.h | 10 +++- .../drm/amd/display/dc/dml/display_mode_vba.c | 1 + 5 files changed, 103 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 32da47e248397..39214a0dcdf2b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -3322,6 +3322,7 @@ void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, display { int i, pipe_idx; bool usr_retraining_support = false; + bool unbounded_req_enabled = false; /* Writeback MCIF_WB arbitration parameters */ dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt); @@ -3357,6 +3358,14 @@ void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, display if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz) context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz; + unbounded_req_enabled = get_unbounded_request_enabled(&context->bw_ctx.dml, pipes, pipe_cnt); + + if (unbounded_req_enabled && pipe_cnt > 1) { + // Unbounded requesting should not ever be used when more than 1 pipe is enabled. + ASSERT(false); + unbounded_req_enabled = false; + } + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { if (!context->res_ctx.pipe_ctx[i].stream) continue; @@ -3375,7 +3384,7 @@ void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, display } else { context->res_ctx.pipe_ctx[i].det_buffer_size_kb = get_det_buffer_size_kbytes(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - context->res_ctx.pipe_ctx[i].unbounded_req = pipes[pipe_idx].pipe.src.unbounded_req_mode; + context->res_ctx.pipe_ctx[i].unbounded_req = unbounded_req_enabled; } if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 1efce9f5eae3b..e9204c711cb96 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -224,6 +224,9 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.nomDETInKByte, mode_lib->vba.UseUnboundedRequesting, + mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, + mode_lib->vba.ip.pixel_chunk_size_kbytes, + mode_lib->vba.ip.rob_buffer_size_kbytes, mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal, v->dummy_vars .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation @@ -285,6 +288,10 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman mode_lib->vba.DETBufferSizeC, &v->UnboundedRequestEnabled, &v->CompressedBufferSizeInkByte, + &v->CompBufReservedSpaceKBytes, + &v->dummy_vars + .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation + .dummy_boolean, /* bool *CompBufReservedSpaceNeedAjustment */ v->dummy_vars .DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation .dummy_boolean_array, /* bool ViewportSizeSupportPerSurface[] */ @@ -293,6 +300,9 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman .dummy_boolean); /* bool *ViewportSizeSupport */ } + v->CompBufReservedSpaceZs = v->CompBufReservedSpaceKBytes * 1024.0 / 256.0; + v->CompBufReservedSpace64B = v->CompBufReservedSpaceKBytes * 1024.0 / 64.0; + // DCFCLK Deep Sleep dml32_CalculateDCFCLKDeepSleep( mode_lib->vba.NumberOfActiveSurfaces, @@ -1530,8 +1540,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->TotalDataReadBandwidth, mode_lib->vba.DCFCLK, mode_lib->vba.ReturnBW, - mode_lib->vba.CompbufReservedSpace64B, - mode_lib->vba.CompbufReservedSpaceZs, + v->CompbufReservedSpace64B, + v->CompbufReservedSpaceZs, mode_lib->vba.SRExitTime, mode_lib->vba.SRExitZ8Time, mode_lib->vba.SynchronizeTimingsFinal, @@ -1596,8 +1606,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->TotalDataReadBandwidth, mode_lib->vba.DCFCLK, mode_lib->vba.ReturnBW, - 0, //mode_lib->vba.CompbufReservedSpace64B, - 0, //mode_lib->vba.CompbufReservedSpaceZs, + 0, //CompbufReservedSpace64B, + 0, //CompbufReservedSpaceZs, mode_lib->vba.SRExitTime, mode_lib->vba.SRExitZ8Time, mode_lib->vba.SynchronizeTimingsFinal, @@ -1659,6 +1669,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) { unsigned int dummy_integer[4]; + bool dummy_boolean[2]; bool MPCCombineMethodAsNeededForPStateChangeAndVoltage; bool MPCCombineMethodAsPossible; enum odm_combine_mode dummy_odm_mode[DC__NUM_DPP__MAX]; @@ -1673,6 +1684,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l bool SubViewportMALLPStateMethod; bool PhantomPipeMALLPStateMethod; unsigned int MaximumMPCCombine; + bool CompBufReservedSpaceNeedAdjustment; + bool CompBufReservedSpaceNeedAdjustmentSingleDPP; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: called\n", __func__); @@ -1905,6 +1918,9 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.nomDETInKByte, mode_lib->vba.UseUnboundedRequesting, + mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, + mode_lib->vba.ip.pixel_chunk_size_kbytes, + mode_lib->vba.ip.rob_buffer_size_kbytes, mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal, mode_lib->vba.Output, mode_lib->vba.ReadBandwidthLuma, @@ -1952,6 +1968,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[7], /* Long DETBufferSizeC[] */ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[0][0], /* bool *UnboundedRequestEnabled */ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[0][0], /* Long *CompressedBufferSizeInkByte */ + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer_array[1][0], /* Long *CompBufReservedSpaceKBytes */ + &CompBufReservedSpaceNeedAdjustmentSingleDPP, mode_lib->vba.SingleDPPViewportSizeSupportPerSurface,/* bool ViewportSizeSupportPerSurface[] */ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[1][0]); /* bool *ViewportSizeSupport */ @@ -2120,9 +2138,18 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } + // if TotalNumberOfActiveDPP is > 1, then there should be no unbounded req mode (hw limitation), the comp buf reserved adjustment is not needed regardless + // if TotalNumberOfActiveDPP is == 1, then will use the SingleDPP version of unbounded_req for the decision + CompBufReservedSpaceNeedAdjustment = (mode_lib->vba.TotalNumberOfActiveDPP[i][j] > 1) ? 0 : CompBufReservedSpaceNeedAdjustmentSingleDPP; + + + if (j == 1 && !dml32_UnboundedRequest(mode_lib->vba.UseUnboundedRequesting, - mode_lib->vba.TotalNumberOfActiveDPP[i][j], NoChroma, - mode_lib->vba.Output[0])) { + mode_lib->vba.TotalNumberOfActiveDPP[i][j], NoChroma, + mode_lib->vba.Output[0], + mode_lib->vba.SurfaceTiling[0], + CompBufReservedSpaceNeedAdjustment, + mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)) { while (!(mode_lib->vba.TotalNumberOfActiveDPP[i][j] >= mode_lib->vba.MaxNumDPP || mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] == 0)) { double BWOfNonCombinedSurfaceOfMaximumBandwidth = 0; @@ -2500,6 +2527,9 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.nomDETInKByte, mode_lib->vba.UseUnboundedRequesting, + mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, + mode_lib->vba.ip.pixel_chunk_size_kbytes, + mode_lib->vba.ip.rob_buffer_size_kbytes, mode_lib->vba.CompressedBufferSegmentSizeInkByteFinal, mode_lib->vba.Output, mode_lib->vba.ReadBandwidthLuma, @@ -2546,6 +2576,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DETBufferSizeCThisState, &mode_lib->vba.UnboundedRequestEnabledThisState, &mode_lib->vba.CompressedBufferSizeInkByteThisState, + &dummy_integer[0], /* Long CompBufReservedSpaceKBytes */ + &dummy_boolean[0], /* bool CompBufReservedSpaceNeedAdjustment */ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[0], &mode_lib->vba.ViewportSizeSupport[i][j]); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 67cbc7923652b..c8a3f367d6229 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -400,6 +400,9 @@ void dml32_CalculateSwathAndDETConfiguration( unsigned int NumberOfActiveSurfaces, unsigned int nomDETInKByte, enum unbounded_requesting_policy UseUnboundedRequestingFinal, + bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, + unsigned int PixelChunkSizeKBytes, + unsigned int ROBSizeKBytes, unsigned int CompressedBufferSegmentSizeInkByteFinal, enum output_encoder_class Output[], double ReadBandwidthLuma[], @@ -447,6 +450,8 @@ void dml32_CalculateSwathAndDETConfiguration( unsigned int DETBufferSizeC[], bool *UnboundedRequestEnabled, unsigned int *CompressedBufferSizeInkByte, + unsigned int *CompBufReservedSpaceKBytes, + bool *CompBufReservedSpaceNeedAdjustment, bool ViewportSizeSupportPerSurface[], bool *ViewportSizeSupport) { @@ -465,6 +470,8 @@ void dml32_CalculateSwathAndDETConfiguration( #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); + dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes); + dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes); #endif dml32_CalculateSwathWidth(ForceSingleDPP, NumberOfActiveSurfaces, @@ -534,8 +541,24 @@ void dml32_CalculateSwathAndDETConfiguration( } } - *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, - NoChromaSurfaces, Output[0]); + // By default, just set the reserved space to 2 pixel chunks size + *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2; + + // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data + // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio] + // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req + *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512); + + if (*CompBufReservedSpaceNeedAdjustment == 1) { + *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512; + } + + #ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: CompBufReservedSpaceKBytes = %d\n", __func__, *CompBufReservedSpaceKBytes); + dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment); + #endif + + *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); dml32_CalculateDETBufferSize(DETSizeOverride, UseMALLForPStateChange, @@ -853,9 +876,12 @@ void dml32_CalculateSwathWidth( } // CalculateSwathWidth bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, - unsigned int TotalNumberOfActiveDPP, - bool NoChroma, - enum output_encoder_class Output) + unsigned int TotalNumberOfActiveDPP, + bool NoChroma, + enum output_encoder_class Output, + enum dm_swizzle_mode SurfaceTiling, + bool CompBufReservedSpaceNeedAdjustment, + bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) { bool ret_val = false; @@ -863,7 +889,20 @@ bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequest TotalNumberOfActiveDPP == 1 && NoChroma); if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) ret_val = false; - return ret_val; + + if (SurfaceTiling == dm_sw_linear) + ret_val = false; + + if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) + ret_val = false; + +#ifdef __DML_VBA_DEBUG__ + dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, CompBufReservedSpaceNeedAdjustment); + dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = %d\n", __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); + dml_print("DML::%s: ret_val = %d\n", __func__, ret_val); +#endif + + return (ret_val); } void dml32_CalculateDETBufferSize( diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index 72461b934ee06..d293856ba906b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -90,6 +90,9 @@ void dml32_CalculateSwathAndDETConfiguration( unsigned int NumberOfActiveSurfaces, unsigned int nomDETInKByte, enum unbounded_requesting_policy UseUnboundedRequestingFinal, + bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, + unsigned int PixelChunkSizeKBytes, + unsigned int ROBSizeKBytes, unsigned int CompressedBufferSegmentSizeInkByteFinal, enum output_encoder_class Output[], double ReadBandwidthLuma[], @@ -137,6 +140,8 @@ void dml32_CalculateSwathAndDETConfiguration( unsigned int DETBufferSizeC[], bool *UnboundedRequestEnabled, unsigned int *CompressedBufferSizeInkByte, + unsigned int *CompBufReservedSpaceKBytes, + bool *CompBufReservedSpaceNeedAdjustment, bool ViewportSizeSupportPerSurface[], bool *ViewportSizeSupport); @@ -181,7 +186,10 @@ void dml32_CalculateSwathWidth( bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, unsigned int TotalNumberOfActiveDPP, bool NoChroma, - enum output_encoder_class Output); + enum output_encoder_class Output, + enum dm_swizzle_mode SurfaceTiling, + bool CompBufReservedSpaceNeedAdjustment, + bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); void dml32_CalculateDETBufferSize( unsigned int DETSizeOverride[], diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c index de78bb8489cb3..503e7d984ff03 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c @@ -111,6 +111,7 @@ dml_get_attr_func(tcalc, mode_lib->vba.TCalc); dml_get_attr_func(fraction_of_urgent_bandwidth, mode_lib->vba.FractionOfUrgentBandwidth); dml_get_attr_func(fraction_of_urgent_bandwidth_imm_flip, mode_lib->vba.FractionOfUrgentBandwidthImmediateFlip); + dml_get_attr_func(cstate_max_cap_mode, mode_lib->vba.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); dml_get_attr_func(comp_buffer_size_kbytes, mode_lib->vba.CompressedBufferSizeInkByte); dml_get_attr_func(pixel_chunk_size_in_kbyte, mode_lib->vba.PixelChunkSizeInKByte); From 26749aa8d1261bd6f2db9d019276d4277dde7df8 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Thu, 7 Jul 2022 17:17:25 -0400 Subject: [PATCH 059/134] drm/amd/display: Revert "drm/amd/display: disable idle optimizations" This reverts commit e7ef5569e71bf3fec01ea513c27c6081c0dbbc64. Idle optimization was disabled due to SMU and firmware bugs. Enable it back for DCN32. DCN321 has them enabled already Fixes: 6a640b95b061 ("drm/amd/display: disable idle optimizations") Signed-off-by: Aurabindo Pillai Reviewed-and-tested-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 39214a0dcdf2b..1b499f42f459d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -972,7 +972,6 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, - .disable_idle_power_optimizations = true, .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, From 041a11095abdd52b38f1ea1355357ecd2b66c0e2 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 7 Jul 2022 10:11:08 -0400 Subject: [PATCH 060/134] drm/amd/display: Drop FPU flags from dcn32_clk_mgr We are working to isolate FPU operations inside the DML folder, and the file dcn32_clk_mgr has some of these operations. This commit moves the FPU operations inside the clock manager and creates the dcn32_fpu file to aggregate those operations. Note that there is no functional change ere, just moving code from one part to another. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/clk_mgr/Makefile | 25 ---- .../display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c | 81 +------------ drivers/gpu/drm/amd/display/dc/dml/Makefile | 2 + .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 113 ++++++++++++++++++ .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 34 ++++++ 5 files changed, 153 insertions(+), 102 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile index 053084121db2e..a48453612d10c 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile @@ -188,31 +188,6 @@ CLK_MGR_DCN32 = dcn32_clk_mgr.o dcn32_clk_mgr_smu_msg.o AMD_DAL_CLK_MGR_DCN32 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn32/,$(CLK_MGR_DCN32)) -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -mhard-float -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -msse2 -endif -endif - AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN32) endif diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c index 5b87f937554d3..c6785969eb1aa 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c @@ -42,6 +42,7 @@ #include "dcn/dcn_3_2_0_sh_mask.h" #include "dcn32/dcn32_clk_mgr.h" +#include "dml/dcn32/dcn32_fpu.h" #define DCN_BASE__INST0_SEG1 0x000000C0 @@ -146,83 +147,9 @@ static void dcn32_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e cl static void dcn32_build_wm_range_table(struct clk_mgr_internal *clk_mgr) { - /* defaults */ - double pstate_latency_us = clk_mgr->base.ctx->dc->dml.soc.dram_clock_change_latency_us; - double fclk_change_latency_us = clk_mgr->base.ctx->dc->dml.soc.fclk_change_latency_us; - double sr_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_exit_time_us; - double sr_enter_plus_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_enter_plus_exit_time_us; - /* For min clocks use as reported by PM FW and report those as min */ - uint16_t min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz; - uint16_t min_dcfclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].dcfclk_mhz; - uint16_t setb_min_uclk_mhz = min_uclk_mhz; - uint16_t dcfclk_mhz_for_the_second_state = clk_mgr->base.ctx->dc->dml.soc.clock_limits[2].dcfclk_mhz; - - /* For Set B ranges use min clocks state 2 when available, and report those to PM FW */ - if (dcfclk_mhz_for_the_second_state) - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = dcfclk_mhz_for_the_second_state; - else - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = clk_mgr->base.bw_params->clk_table.entries[0].dcfclk_mhz; - - if (clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz) - setb_min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz; - - /* Set A - Normal - default values */ - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us = fclk_change_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF; - - /* Set B - Performance - higher clocks, using DPM[2] DCFCLK and UCLK */ - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.fclk_change_latency_us = fclk_change_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = setb_min_uclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF; - - /* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */ - /* 'DalDummyClockChangeLatencyNs' registry key option set to 0x7FFFFFFF can be used to disable Set C for dummy p-state */ - if (clk_mgr->base.ctx->dc->bb_overrides.dummy_clock_change_latency_ns != 0x7FFFFFFF) { - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 38; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us = fclk_change_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF; - clk_mgr->base.bw_params->dummy_pstate_table[0].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz * 16; - clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38; - clk_mgr->base.bw_params->dummy_pstate_table[1].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[1].memclk_mhz * 16; - clk_mgr->base.bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9; - clk_mgr->base.bw_params->dummy_pstate_table[2].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz * 16; - clk_mgr->base.bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8; - clk_mgr->base.bw_params->dummy_pstate_table[3].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[3].memclk_mhz * 16; - clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5; - } - /* Set D - MALL - SR enter and exit time specific to MALL, TBD after bringup or later phase for now use DRAM values / 2 */ - /* For MALL DRAM clock change latency is N/A, for watermak calculations use lowest value dummy P state latency */ - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.fclk_change_latency_us = fclk_change_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = sr_exit_time_us; // TBD - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; // TBD - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF; + DC_FP_START(); + dcn32_build_wm_range_table_fpu(clk_mgr); + DC_FP_END(); } void dcn32_init_clocks(struct clk_mgr *clk_mgr_base) diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index c48688cdd7f7d..01cb0ef3a2b02 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -72,6 +72,7 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_ccflags) @@ -124,6 +125,7 @@ DML += dcn30/dcn30_fpu.o dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_3 DML += dcn31/display_mode_vba_31.o dcn31/display_rq_dlg_calc_31.o DML += dcn32/display_mode_vba_32.o dcn32/display_rq_dlg_calc_32.o dcn32/display_mode_vba_util_32.o DML += dcn31/dcn31_fpu.o +DML += dcn32/dcn32_fpu.o DML += dcn301/dcn301_fpu.o DML += dcn302/dcn302_fpu.o DML += dcn303/dcn303_fpu.o diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c new file mode 100644 index 0000000000000..89b596599c3d3 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ +#include "dcn32_fpu.h" + +// We need this includes for WATERMARKS_* defines +#include "clk_mgr/dcn32/dcn32_smu13_driver_if.h" + +void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr) +{ + /* defaults */ + double pstate_latency_us = clk_mgr->base.ctx->dc->dml.soc.dram_clock_change_latency_us; + double fclk_change_latency_us = clk_mgr->base.ctx->dc->dml.soc.fclk_change_latency_us; + double sr_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_exit_time_us; + double sr_enter_plus_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_enter_plus_exit_time_us; + /* For min clocks use as reported by PM FW and report those as min */ + uint16_t min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz; + uint16_t min_dcfclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].dcfclk_mhz; + uint16_t setb_min_uclk_mhz = min_uclk_mhz; + uint16_t dcfclk_mhz_for_the_second_state = clk_mgr->base.ctx->dc->dml.soc.clock_limits[2].dcfclk_mhz; + + dc_assert_fp_enabled(); + + /* For Set B ranges use min clocks state 2 when available, and report those to PM FW */ + if (dcfclk_mhz_for_the_second_state) + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = dcfclk_mhz_for_the_second_state; + else + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = clk_mgr->base.bw_params->clk_table.entries[0].dcfclk_mhz; + + if (clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz) + setb_min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz; + + /* Set A - Normal - default values */ + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].valid = true; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us = fclk_change_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF; + + /* Set B - Performance - higher clocks, using DPM[2] DCFCLK and UCLK */ + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].valid = true; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.fclk_change_latency_us = fclk_change_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = setb_min_uclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF; + + /* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */ + /* 'DalDummyClockChangeLatencyNs' registry key option set to 0x7FFFFFFF can be used to disable Set C for dummy p-state */ + if (clk_mgr->base.ctx->dc->bb_overrides.dummy_clock_change_latency_ns != 0x7FFFFFFF) { + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 38; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us = fclk_change_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF; + clk_mgr->base.bw_params->dummy_pstate_table[0].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz * 16; + clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38; + clk_mgr->base.bw_params->dummy_pstate_table[1].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[1].memclk_mhz * 16; + clk_mgr->base.bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9; + clk_mgr->base.bw_params->dummy_pstate_table[2].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz * 16; + clk_mgr->base.bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8; + clk_mgr->base.bw_params->dummy_pstate_table[3].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[3].memclk_mhz * 16; + clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5; + } + /* Set D - MALL - SR enter and exit time specific to MALL, TBD after bringup or later phase for now use DRAM values / 2 */ + /* For MALL DRAM clock change latency is N/A, for watermak calculations use lowest value dummy P state latency */ + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].valid = true; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.fclk_change_latency_us = fclk_change_latency_us; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = sr_exit_time_us / 2; // TBD + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us / 2; // TBD + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = min_dcfclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF; +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h new file mode 100644 index 0000000000000..72a6dd75af0e0 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DCN32_FPU_H__ +#define __DCN32_FPU_H__ + +#include "clk_mgr_internal.h" + +void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr); + +#endif From af14e7c2fc9b60af70b410a7dace116eaa5e4e65 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 5 May 2022 18:09:11 -0400 Subject: [PATCH 061/134] drm/amdgpu: add the IP discovery IP versions for HW INFO data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the former pad element to store the IP versions from the IP discovery table. This allows userspace to get the IP version from the kernel to better align with hardware IP versions. Proposed mesa patch: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17411/diffs?commit_id=c8a63590dfd0d64e6e6a634dcfed993f135dd075 Reviewed-by: Marek Olšák Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 24 ++++++++++++++++++++++++ include/uapi/drm/amdgpu_drm.h | 3 ++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index b8ba59c93fc0c..1369c25448dc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -461,6 +461,30 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, result->hw_ip_version_major = adev->ip_blocks[i].version->major; result->hw_ip_version_minor = adev->ip_blocks[i].version->minor; + + if (adev->asic_type >= CHIP_VEGA10) { + switch (type) { + case AMD_IP_BLOCK_TYPE_GFX: + result->ip_discovery_version = adev->ip_versions[GC_HWIP][0]; + break; + case AMD_IP_BLOCK_TYPE_SDMA: + result->ip_discovery_version = adev->ip_versions[SDMA0_HWIP][0]; + break; + case AMD_IP_BLOCK_TYPE_UVD: + case AMD_IP_BLOCK_TYPE_VCN: + case AMD_IP_BLOCK_TYPE_JPEG: + result->ip_discovery_version = adev->ip_versions[UVD_HWIP][0]; + break; + case AMD_IP_BLOCK_TYPE_VCE: + result->ip_discovery_version = adev->ip_versions[VCE_HWIP][0]; + break; + default: + result->ip_discovery_version = 0; + break; + } + } else { + result->ip_discovery_version = 0; + } result->capabilities_flags = 0; result->available_rings = (1 << num_rings) - 1; result->ib_start_alignment = ib_start_alignment; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 63de71f531101..c2c9c674a2232 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1097,7 +1097,8 @@ struct drm_amdgpu_info_hw_ip { __u32 ib_size_alignment; /** Bitmask of available rings. Bit 0 means ring 0, etc. */ __u32 available_rings; - __u32 _pad; + /** version info: bits 23:16 major, 15:8 minor, 7:0 revision */ + __u32 ip_discovery_version; }; struct drm_amdgpu_info_num_handles { From 465576ca481caa9cf18550652efdc6b103701c68 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 20 May 2022 10:59:35 -0400 Subject: [PATCH 062/134] drm/amdgpu: bump driver version for IP discovery info in HW INFO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So userspace knows when it is available. Proposed mesa patch: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17411/diffs?commit_id=c8a63590dfd0d64e6e6a634dcfed993f135dd075 Reviewed-by: Marek Olšák Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 52d1fd7d8e811..30f8c46f16b43 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -100,10 +100,11 @@ * - 3.44.0 - DCN3 supports DCC independent block settings: !64B && 128B, 64B && 128B * - 3.45.0 - Add context ioctl stable pstate interface * - 3.46.0 - To enable hot plug amdgpu tests in libdrm - * * 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags + * - 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags + * - 3.48.0 - Add IP discovery version info to HW INFO */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 47 +#define KMS_DRIVER_MINOR 48 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit; From f7bacd97af853a9bba9bb8d1baa12f997e60122f Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 7 Jul 2022 16:03:40 -0400 Subject: [PATCH 063/134] drm/amd/display: Move populate phaton function to dml The function dcn32_helper_populate_phantom_dlg_params uses FPU operations. For this reason, this commit moves this function to the dcn32_fpu file, and we ensure that we only invoke it under the kernel_fpu protection. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn32/dcn32_resource.c | 7 +++ .../display/dc/dcn32/dcn32_resource_helpers.c | 44 ------------------- .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 43 ++++++++++++++++++ .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 5 +++ 4 files changed, 55 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 1b499f42f459d..efbae88c492a5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -88,6 +88,7 @@ #include "dml/dcn30/display_mode_vba_30.h" #include "vm_helper.h" #include "dcn20/dcn20_vmid.h" +#include "dml/dcn32/dcn32_fpu.h" #define DCN_BASE__INST0_SEG1 0x000000C0 #define DCN_BASE__INST0_SEG2 0x000034C0 @@ -312,6 +313,7 @@ enum dcn32_clk_src_array_id { .reg_name = NBIO_BASE(regBIF_BX0_ ## reg_name ## _BASE_IDX) + \ regBIF_BX0_ ## reg_name +#undef CTX #define CTX ctx #define REG(reg_name) \ (DCN_BASE.instance[0].segment[reg ## reg_name ## _BASE_IDX] + reg ## reg_name) @@ -2666,6 +2668,11 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, memset(merge, 0, MAX_PIPES * sizeof(bool)); *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge); + // Most populate phantom DLG params before programming hardware / timing for phantom pipe + DC_FP_START(); + dcn32_helper_populate_phantom_dlg_params(dc, context, pipes, *pipe_cnt); + DC_FP_END(); + // Note: We can't apply the phantom pipes to hardware at this time. We have to wait // until driver has acquired the DMCUB lock to do it safely. } diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index a6ef1dba01fe4..633d3ee18cfa3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -28,50 +28,6 @@ #include "dcn20/dcn20_resource.h" #include "dml/dcn32/display_mode_vba_util_32.h" -/** - * ******************************************************************************************** - * dcn32_helper_populate_phantom_dlg_params: Get DLG params for phantom pipes and populate pipe_ctx - * with those params. - * - * This function must be called AFTER the phantom pipes are added to context and run through DML - * (so that the DLG params for the phantom pipes can be populated), and BEFORE we program the - * timing for the phantom pipes. - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * @param [in] pipes: DML pipe params array - * @param [in] pipe_cnt: DML pipe count - * - * @return: void - * - * ******************************************************************************************** - */ -void dcn32_helper_populate_phantom_dlg_params(struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt) -{ - uint32_t i, pipe_idx; - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - if (!pipe->stream) - continue; - - if (pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { - pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - pipe->pipe_dlg_param = pipes[pipe_idx].pipe.dest; - } - pipe_idx++; - } -} - /** * ******************************************************************************************** * dcn32_helper_calculate_num_ways_for_subvp: Calculate number of ways needed for SubVP diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 89b596599c3d3..253ff9659b0d1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -111,3 +111,46 @@ void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr) clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF; } +/** + * dcn32_helper_populate_phantom_dlg_params - Get DLG params for phantom pipes + * and populate pipe_ctx with those params. + * + * This function must be called AFTER the phantom pipes are added to context + * and run through DML (so that the DLG params for the phantom pipes can be + * populated), and BEFORE we program the timing for the phantom pipes. + * + * @dc: [in] current dc state + * @context: [in] new dc state + * @pipes: [in] DML pipe params array + * @pipe_cnt: [in] DML pipe count + */ +void dcn32_helper_populate_phantom_dlg_params(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt) +{ + uint32_t i, pipe_idx; + + dc_assert_fp_enabled(); + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->stream) + continue; + + if (pipe->plane_state && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { + pipes[pipe_idx].pipe.dest.vstartup_start = + get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vupdate_offset = + get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vupdate_width = + get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipes[pipe_idx].pipe.dest.vready_offset = + get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipe->pipe_dlg_param = pipes[pipe_idx].pipe.dest; + } + pipe_idx++; + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index 72a6dd75af0e0..492f99b6d5619 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -31,4 +31,9 @@ void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr); +void dcn32_helper_populate_phantom_dlg_params(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt); + #endif From 792a0cdde34d417bc2c8266d8015c5fd58d44a0d Mon Sep 17 00:00:00 2001 From: Leo Li Date: Wed, 6 Jul 2022 14:48:52 -0400 Subject: [PATCH 064/134] drm/amd/display: Add visualconfirm module parameter [Why] Being able to configure visual confirm at boot or in cmdline is helpful when debugging. [How] Add a module parameter to configure DC visual confirm, which works the same way as the equivalent debugfs entry. Signed-off-by: Leo Li Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++++ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 ++ 3 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 3aa8ae1bc35af..b075845a53287 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -197,6 +197,7 @@ extern uint amdgpu_smu_memory_pool_size; extern int amdgpu_smu_pptable_id; extern uint amdgpu_dc_feature_mask; extern uint amdgpu_dc_debug_mask; +extern uint amdgpu_dc_visual_confirm; extern uint amdgpu_dm_abm_level; extern int amdgpu_backlight; extern struct amdgpu_mgpu_info mgpu_info; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 30f8c46f16b43..429fcdf28836e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -168,6 +168,7 @@ int amdgpu_smu_pptable_id = -1; */ uint amdgpu_dc_feature_mask = 2; uint amdgpu_dc_debug_mask; +uint amdgpu_dc_visual_confirm; int amdgpu_async_gfx_ring = 1; int amdgpu_mcbp; int amdgpu_discovery = -1; @@ -828,6 +829,9 @@ module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444); MODULE_PARM_DESC(dcdebugmask, "all debug options disabled (default))"); module_param_named(dcdebugmask, amdgpu_dc_debug_mask, uint, 0444); +MODULE_PARM_DESC(visualconfirm, "Visual confirm (0 = off (default), 1 = MPO, 5 = PSR)"); +module_param_named(visualconfirm, amdgpu_dc_visual_confirm, uint, 0444); + /** * DOC: abmlevel (uint) * Override the default ABM (Adaptive Backlight Management) level used for DC diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a9f9c929dca6b..22a3f89727052 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1538,6 +1538,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (amdgpu_dc_debug_mask & DC_FORCE_SUBVP_MCLK_SWITCH) adev->dm.dc->debug.force_subvp_mclk_switch = true; + adev->dm.dc->debug.visual_confirm = amdgpu_dc_visual_confirm; + r = dm_dmub_hw_init(adev); if (r) { DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r); From 8813381a62e1f1703f8fbeccc5fa4fcc988be882 Mon Sep 17 00:00:00 2001 From: Leo Li Date: Wed, 6 Jul 2022 14:56:28 -0400 Subject: [PATCH 065/134] drm/amd/display: Add dcdebugmask option for disabling MPO [Why & How] It's useful to disable MPO when debugging or testing. Therefore, add a dcdebugmask option to disable MPO. Signed-off-by: Leo Li Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++++ drivers/gpu/drm/amd/include/amd_shared.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 22a3f89727052..8660d93cc4055 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4197,6 +4197,10 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) for (i = 0; i < dm->dc->caps.max_planes; ++i) { struct dc_plane_cap *plane = &dm->dc->caps.planes[i]; + /* Do not create overlay if MPO disabled */ + if (amdgpu_dc_debug_mask & DC_DISABLE_MPO) + break; + if (plane->type != DC_PLANE_TYPE_DCN_UNIVERSAL) continue; diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 1db21d13726dd..f175e65b853a0 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -249,6 +249,7 @@ enum DC_DEBUG_MASK { DC_DISABLE_CLOCK_GATING = 0x8, DC_DISABLE_PSR = 0x10, DC_FORCE_SUBVP_MCLK_SWITCH = 0x20, + DC_DISABLE_MPO = 0x40, }; enum amd_dpm_forced_level; From ccc4200cfb2518fea042b16f090962b07314439b Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Tue, 21 Jun 2022 14:06:56 +0800 Subject: [PATCH 066/134] drm/pm/swsmu: add ras eeprom i2c function for smu13 v13_0_0 Add ras eeprom i2c function for smu13 v13_0_0. Signed-off-by: YiPeng Chai Acked-by: Evan Quan Acked-by: Alex Deucher Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 157 ++++++++++++++++++ 1 file changed, 157 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index ce2fa04e3926a..2b83191e80064 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -196,6 +196,7 @@ static struct cmn2asic_mapping smu_v13_0_0_table_map[SMU_TABLE_COUNT] = { TAB_MAP(DRIVER_SMU_CONFIG), TAB_MAP(ACTIVITY_MONITOR_COEFF), [SMU_TABLE_COMBO_PPTABLE] = {1, TABLE_COMBO_PPTABLE}, + TAB_MAP(I2C_COMMANDS), }; static struct cmn2asic_mapping smu_v13_0_0_pwr_src_map[SMU_POWER_SOURCE_COUNT] = { @@ -1606,9 +1607,165 @@ static bool smu_v13_0_0_is_mode1_reset_supported(struct smu_context *smu) return true; } +static int smu_v13_0_0_i2c_xfer(struct i2c_adapter *i2c_adap, + struct i2c_msg *msg, int num_msgs) +{ + struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(i2c_adap); + struct amdgpu_device *adev = smu_i2c->adev; + struct smu_context *smu = adev->powerplay.pp_handle; + struct smu_table_context *smu_table = &smu->smu_table; + struct smu_table *table = &smu_table->driver_table; + SwI2cRequest_t *req, *res = (SwI2cRequest_t *)table->cpu_addr; + int i, j, r, c; + u16 dir; + + if (!adev->pm.dpm_enabled) + return -EBUSY; + + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (!req) + return -ENOMEM; + + req->I2CcontrollerPort = smu_i2c->port; + req->I2CSpeed = I2C_SPEED_FAST_400K; + req->SlaveAddress = msg[0].addr << 1; /* wants an 8-bit address */ + dir = msg[0].flags & I2C_M_RD; + + for (c = i = 0; i < num_msgs; i++) { + for (j = 0; j < msg[i].len; j++, c++) { + SwI2cCmd_t *cmd = &req->SwI2cCmds[c]; + + if (!(msg[i].flags & I2C_M_RD)) { + /* write */ + cmd->CmdConfig |= CMDCONFIG_READWRITE_MASK; + cmd->ReadWriteData = msg[i].buf[j]; + } + + if ((dir ^ msg[i].flags) & I2C_M_RD) { + /* The direction changes. + */ + dir = msg[i].flags & I2C_M_RD; + cmd->CmdConfig |= CMDCONFIG_RESTART_MASK; + } + + req->NumCmds++; + + /* + * Insert STOP if we are at the last byte of either last + * message for the transaction or the client explicitly + * requires a STOP at this particular message. + */ + if ((j == msg[i].len - 1) && + ((i == num_msgs - 1) || (msg[i].flags & I2C_M_STOP))) { + cmd->CmdConfig &= ~CMDCONFIG_RESTART_MASK; + cmd->CmdConfig |= CMDCONFIG_STOP_MASK; + } + } + } + mutex_lock(&adev->pm.mutex); + r = smu_cmn_update_table(smu, SMU_TABLE_I2C_COMMANDS, 0, req, true); + mutex_unlock(&adev->pm.mutex); + if (r) + goto fail; + + for (c = i = 0; i < num_msgs; i++) { + if (!(msg[i].flags & I2C_M_RD)) { + c += msg[i].len; + continue; + } + for (j = 0; j < msg[i].len; j++, c++) { + SwI2cCmd_t *cmd = &res->SwI2cCmds[c]; + + msg[i].buf[j] = cmd->ReadWriteData; + } + } + r = num_msgs; +fail: + kfree(req); + return r; +} + +static u32 smu_v13_0_0_i2c_func(struct i2c_adapter *adap) +{ + return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; +} + +static const struct i2c_algorithm smu_v13_0_0_i2c_algo = { + .master_xfer = smu_v13_0_0_i2c_xfer, + .functionality = smu_v13_0_0_i2c_func, +}; + +static const struct i2c_adapter_quirks smu_v13_0_0_i2c_control_quirks = { + .flags = I2C_AQ_COMB | I2C_AQ_COMB_SAME_ADDR | I2C_AQ_NO_ZERO_LEN, + .max_read_len = MAX_SW_I2C_COMMANDS, + .max_write_len = MAX_SW_I2C_COMMANDS, + .max_comb_1st_msg_len = 2, + .max_comb_2nd_msg_len = MAX_SW_I2C_COMMANDS - 2, +}; + +static int smu_v13_0_0_i2c_control_init(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + int res, i; + + for (i = 0; i < MAX_SMU_I2C_BUSES; i++) { + struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; + struct i2c_adapter *control = &smu_i2c->adapter; + + smu_i2c->adev = adev; + smu_i2c->port = i; + mutex_init(&smu_i2c->mutex); + control->owner = THIS_MODULE; + control->class = I2C_CLASS_SPD; + control->dev.parent = &adev->pdev->dev; + control->algo = &smu_v13_0_0_i2c_algo; + snprintf(control->name, sizeof(control->name), "AMDGPU SMU %d", i); + control->quirks = &smu_v13_0_0_i2c_control_quirks; + i2c_set_adapdata(control, smu_i2c); + + res = i2c_add_adapter(control); + if (res) { + DRM_ERROR("Failed to register hw i2c, err: %d\n", res); + goto Out_err; + } + } + + /* assign the buses used for the FRU EEPROM and RAS EEPROM */ + /* XXX ideally this would be something in a vbios data table */ + adev->pm.ras_eeprom_i2c_bus = &adev->pm.smu_i2c[1].adapter; + adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; + + return 0; +Out_err: + for ( ; i >= 0; i--) { + struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; + struct i2c_adapter *control = &smu_i2c->adapter; + + i2c_del_adapter(control); + } + return res; +} + +static void smu_v13_0_0_i2c_control_fini(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + int i; + + for (i = 0; i < MAX_SMU_I2C_BUSES; i++) { + struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; + struct i2c_adapter *control = &smu_i2c->adapter; + + i2c_del_adapter(control); + } + adev->pm.ras_eeprom_i2c_bus = NULL; + adev->pm.fru_eeprom_i2c_bus = NULL; +} + static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_0_set_default_dpm_table, + .i2c_init = smu_v13_0_0_i2c_control_init, + .i2c_fini = smu_v13_0_0_i2c_control_fini, .is_dpm_running = smu_v13_0_0_is_dpm_running, .dump_pptable = smu_v13_0_0_dump_pptable, .init_microcode = smu_v13_0_init_microcode, From 25e751642a38204da189e0e239055702caeb461c Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 7 Jul 2022 17:15:19 -0400 Subject: [PATCH 067/134] drm/amd/display: Move predict pipe to dml fpu folder The function dcn32_predict_pipe_split uses FPU operations. This commit moves this function to the dcn32_fpu file, and we ensure that we only invoke it under the kernel_fpu protection. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn32/dcn32_resource.c | 2 + .../drm/amd/display/dc/dcn32/dcn32_resource.h | 2 - .../display/dc/dcn32/dcn32_resource_helpers.c | 33 ---------------- .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 39 ++++++++++++++++++- .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 4 ++ 5 files changed, 44 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index efbae88c492a5..c5ba411d12ba2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -3053,7 +3053,9 @@ int dcn32_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1; } + DC_FP_START(); is_pipe_split_expected[i] = dcn32_predict_pipe_split(context, pipes[i].pipe, i); + DC_FP_END(); pipe_cnt++; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index 10254ab7e9d9f..901aa7e13bd2e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -100,8 +100,6 @@ bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc, bool dcn32_subvp_in_use(struct dc *dc, struct dc_state *context); -bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st pipe, int index); - void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_params_st *pipes, bool *is_pipe_split_expected, int pipe_cnt); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index 633d3ee18cfa3..796e3d966a766 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -153,39 +153,6 @@ bool dcn32_subvp_in_use(struct dc *dc, return false; } -bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st pipe, int index) -{ - double pscl_throughput, pscl_throughput_chroma, dpp_clk_single_dpp, clock, - clk_frequency = 0.0, vco_speed = context->bw_ctx.dml.soc.dispclk_dppclk_vco_speed_mhz; - - dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(pipe.scale_ratio_depth.hscl_ratio, - pipe.scale_ratio_depth.hscl_ratio_c, - pipe.scale_ratio_depth.vscl_ratio, - pipe.scale_ratio_depth.vscl_ratio_c, - context->bw_ctx.dml.ip.max_dchub_pscl_bw_pix_per_clk, - context->bw_ctx.dml.ip.max_pscl_lb_bw_pix_per_clk, - pipe.dest.pixel_rate_mhz, - pipe.src.source_format, - pipe.scale_taps.htaps, - pipe.scale_taps.htaps_c, - pipe.scale_taps.vtaps, - pipe.scale_taps.vtaps_c, - - /* Output */ - &pscl_throughput, &pscl_throughput_chroma, - &dpp_clk_single_dpp); - - clock = dpp_clk_single_dpp * (1 + context->bw_ctx.dml.soc.dcn_downspread_percent / 100); - - if (clock > 0) - clk_frequency = vco_speed * 4.0 / ((int) (vco_speed * 4.0)); - - if (clk_frequency > context->bw_ctx.dml.soc.clock_limits[index].dppclk_mhz) - return true; - else - return false; -} - void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_params_st *pipes, bool *is_pipe_split_expected, int pipe_cnt) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 253ff9659b0d1..1b9e34f1232ab 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -24,7 +24,7 @@ * */ #include "dcn32_fpu.h" - +#include "display_mode_vba_util_32.h" // We need this includes for WATERMARKS_* defines #include "clk_mgr/dcn32/dcn32_smu13_driver_if.h" @@ -154,3 +154,40 @@ void dcn32_helper_populate_phantom_dlg_params(struct dc *dc, } } +bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st pipe, int index) +{ + double pscl_throughput; + double pscl_throughput_chroma; + double dpp_clk_single_dpp, clock; + double clk_frequency = 0.0; + double vco_speed = context->bw_ctx.dml.soc.dispclk_dppclk_vco_speed_mhz; + + dc_assert_fp_enabled(); + + dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(pipe.scale_ratio_depth.hscl_ratio, + pipe.scale_ratio_depth.hscl_ratio_c, + pipe.scale_ratio_depth.vscl_ratio, + pipe.scale_ratio_depth.vscl_ratio_c, + context->bw_ctx.dml.ip.max_dchub_pscl_bw_pix_per_clk, + context->bw_ctx.dml.ip.max_pscl_lb_bw_pix_per_clk, + pipe.dest.pixel_rate_mhz, + pipe.src.source_format, + pipe.scale_taps.htaps, + pipe.scale_taps.htaps_c, + pipe.scale_taps.vtaps, + pipe.scale_taps.vtaps_c, + /* Output */ + &pscl_throughput, &pscl_throughput_chroma, + &dpp_clk_single_dpp); + + clock = dpp_clk_single_dpp * (1 + context->bw_ctx.dml.soc.dcn_downspread_percent / 100); + + if (clock > 0) + clk_frequency = vco_speed * 4.0 / ((int)(vco_speed * 4.0)); + + if (clk_frequency > context->bw_ctx.dml.soc.clock_limits[index].dppclk_mhz) + return true; + else + return false; +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index 492f99b6d5619..d5f157cdd0b4f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -36,4 +36,8 @@ void dcn32_helper_populate_phantom_dlg_params(struct dc *dc, display_e2e_pipe_params_st *pipes, int pipe_cnt); +bool dcn32_predict_pipe_split(struct dc_state *context, + display_pipe_params_st pipe, + int index); + #endif From 34a65beccdea16a12d862e94d004ccf00de8cb3b Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Wed, 20 Jul 2022 20:49:17 +0800 Subject: [PATCH 068/134] drm/amd/pm: update driver if header for smu_13_0_7 update driver if header for smu_13_0_7 Signed-off-by: Kenneth Feng Acked-by: Hawking Zhang Signed-off-by: Alex Deucher --- .../inc/pmfw_if/smu13_driver_if_v13_0_7.h | 24 ++++++++++++------- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 2 +- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h index 132da684e3799..25c08f963f499 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h @@ -25,10 +25,10 @@ // *** IMPORTANT *** // PMFW TEAM: Always increment the interface version on any change to this file -#define SMU13_DRIVER_IF_VERSION 0x2A +#define SMU13_DRIVER_IF_VERSION 0x2C //Increment this version if SkuTable_t or BoardTable_t change -#define PPTABLE_VERSION 0x1E +#define PPTABLE_VERSION 0x20 #define NUM_GFXCLK_DPM_LEVELS 16 #define NUM_SOCCLK_DPM_LEVELS 8 @@ -152,6 +152,7 @@ typedef enum { #define DEBUG_OVERRIDE_DISABLE_DFLL 0x00000200 #define DEBUG_OVERRIDE_ENABLE_RLC_VF_BRINGUP_MODE 0x00000400 #define DEBUG_OVERRIDE_DFLL_MASTER_MODE 0x00000800 +#define DEBUG_OVERRIDE_ENABLE_PROFILING_MODE 0x00001000 // VR Mapping Bit Defines #define VR_MAPPING_VR_SELECT_MASK 0x01 @@ -1014,8 +1015,8 @@ typedef struct { uint16_t Vmin_Hot_Eol[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) End-of-life Vset to be used at hot. uint16_t Vmin_Cold_Eol[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) End-of-life Vset to be used at cold. uint16_t Vmin_Aging_Offset[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Worst-case aging margin - uint16_t Vmin_Plat_Offset_Hot[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Platform offset apply to T0 Hot - uint16_t Vmin_Plat_Offset_Cold[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Platform offset apply to T0 Cold + uint16_t Spare_Vmin_Plat_Offset_Hot[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Platform offset apply to T0 Hot + uint16_t Spare_Vmin_Plat_Offset_Cold[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Platform offset apply to T0 Cold //This is a fixed/minimum VMIN aging degradation offset which is applied at T0. This reflects the minimum amount of aging already accounted for. uint16_t VcBtcFixedVminAgingOffset[PMFW_VOLT_PLANE_COUNT]; @@ -1081,11 +1082,15 @@ typedef struct { uint16_t GfxclkFreqGfxUlv; // in MHz uint8_t GfxIdlePadding2[2]; - - uint32_t GfxoffSpare[16]; + uint32_t GfxOffEntryHysteresis; //For RLC to count after it enters CGCG, and before triggers GFXOFF entry + uint32_t GfxoffSpare[15]; // GFX GPO - uint32_t GfxGpoSpare[16]; + float DfllBtcMasterScalerM; + int32_t DfllBtcMasterScalerB; + float DfllBtcSlaveScalerM; + int32_t DfllBtcSlaveScalerB; + uint32_t GfxGpoSpare[12]; // GFX DCS @@ -1326,8 +1331,11 @@ typedef struct { uint32_t PostVoltageSetBacoDelay; // in microseconds. Amount of time FW will wait after power good is established or PSI0 command is issued uint32_t BacoEntryDelay; // in milliseconds. Amount of time FW will wait to trigger BACO entry after receiving entry notification from OS + uint8_t FuseWritePowerMuxPresent; + uint8_t FuseWritePadding[3]; + // SECTION: Board Reserved - uint32_t BoardSpare[64]; + uint32_t BoardSpare[63]; // SECTION: Structure Padding diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index 038a8956de5b2..3e5838346f022 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -31,7 +31,7 @@ #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x04 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x2A -#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2A +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C #define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms From de0246ee9fc95f1072a0d26ec4025a3545f159b8 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 14 Jul 2022 14:45:18 +0800 Subject: [PATCH 069/134] drm/amd/pm: enable GPO feature support for SMU13.0.0 The feature is ready with latest firmwares. Signed-off-by: Evan Quan Reviewed-by: Hawking Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 2b83191e80064..2b973d76fad1d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -311,6 +311,8 @@ smu_v13_0_0_get_allowed_feature_mask(struct smu_context *smu, *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_VR0HOT_BIT); + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_GFX_POWER_OPTIMIZER_BIT); + return 0; } From 25dfc8fab47fb0d7231154e07e20b5fc0221f96e Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 15 Jul 2022 16:48:09 +0800 Subject: [PATCH 070/134] drm/amd/pm: update SMU13.0.0 driver_if header To fit the latest 78.49.0 PMFW. Also, bump the version to 0x2B. Signed-off-by: Evan Quan Reviewed-by: Hawking Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h | 4 ++-- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h index 5becfc1bb2ecc..2b672d102c966 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h @@ -973,8 +973,8 @@ typedef struct { uint16_t Vmin_Hot_Eol[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) End-of-life Vset to be used at hot. uint16_t Vmin_Cold_Eol[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) End-of-life Vset to be used at cold. uint16_t Vmin_Aging_Offset[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Worst-case aging margin - uint16_t Vmin_Plat_Offset_Hot[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Platform offset apply to T0 Hot - uint16_t Vmin_Plat_Offset_Cold[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Platform offset apply to T0 Cold + uint16_t Spare_Vmin_Plat_Offset_Hot[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Platform offset apply to T0 Hot + uint16_t Spare_Vmin_Plat_Offset_Cold[PMFW_VOLT_PLANE_COUNT]; //In mV(Q2) Platform offset apply to T0 Cold //This is a fixed/minimum VMIN aging degradation offset which is applied at T0. This reflects the minimum amount of aging already accounted for. uint16_t VcBtcFixedVminAgingOffset[PMFW_VOLT_PLANE_COUNT]; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index 3e5838346f022..72b553618116e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -30,7 +30,7 @@ #define SMU13_DRIVER_IF_VERSION_ALDE 0x08 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x04 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04 -#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x2A +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x2B #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C #define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms From 326f0672153e0ff5b19f0ff9d6b5f75a64a9c2b5 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 12 Jul 2022 14:23:49 +0800 Subject: [PATCH 071/134] drm/amd/pm: correct smu 13.0.0/7 mp1 state setup Only PP_MP1_STATE_UNLOAD is supported for now. For other mp1 state, we should just ignore it. Otherwise, there will be errors coming out. Signed-off-by: Evan Quan Reviewed-by: Hawking Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 19 ++++++++++++++++++- .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 19 ++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 2b973d76fad1d..931c775fe27ee 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -1763,6 +1763,23 @@ static void smu_v13_0_0_i2c_control_fini(struct smu_context *smu) adev->pm.fru_eeprom_i2c_bus = NULL; } +static int smu_v13_0_0_set_mp1_state(struct smu_context *smu, + enum pp_mp1_state mp1_state) +{ + int ret; + + switch (mp1_state) { + case PP_MP1_STATE_UNLOAD: + ret = smu_cmn_set_mp1_state(smu, mp1_state); + break; + default: + /* Ignore others */ + ret = 0; + } + + return ret; +} + static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_0_set_default_dpm_table, @@ -1829,7 +1846,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .baco_exit = smu_v13_0_baco_exit, .mode1_reset_is_support = smu_v13_0_0_is_mode1_reset_supported, .mode1_reset = smu_v13_0_mode1_reset, - .set_mp1_state = smu_cmn_set_mp1_state, + .set_mp1_state = smu_v13_0_0_set_mp1_state, }; void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 16eea2de8a2d4..9dd56e73218be 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -1550,6 +1550,23 @@ static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *inp return ret; } +static int smu_v13_0_7_set_mp1_state(struct smu_context *smu, + enum pp_mp1_state mp1_state) +{ + int ret; + + switch (mp1_state) { + case PP_MP1_STATE_UNLOAD: + ret = smu_cmn_set_mp1_state(smu, mp1_state); + break; + default: + /* Ignore others */ + ret = 0; + } + + return ret; +} + static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_7_set_default_dpm_table, @@ -1607,7 +1624,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .baco_set_state = smu_v13_0_baco_set_state, .baco_enter = smu_v13_0_baco_enter, .baco_exit = smu_v13_0_baco_exit, - .set_mp1_state = smu_cmn_set_mp1_state, + .set_mp1_state = smu_v13_0_7_set_mp1_state, }; void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu) From 42c7de9622b2ec64f7c15c336b75f8933ea7545c Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 18 Jul 2022 10:45:35 +0800 Subject: [PATCH 072/134] drm/amd/pm: revise the driver reloading fix for SMU 13.0.0 and 13.0.7 The current approach breaks S3/S4 as asic reset is needed for them. And putting SMU out of service(via SMU_MSG_PrepareMp1ForUnload) will make that(asic reset) failed. Considering with current designs, there is actually also asic reset involved on driver reloading. That can make asic back to a clean state. So, the SMU_MSG_PrepareMp1ForUnload operation will be not so necessary. Thus we will just drop the SMU_MSG_PrepareMp1ForUnload operation. We may revise the whole driver reloading sequences when there is a better design. Fixes: 72aeb6ee0c78 ("drm/amd/pm: fix driver reload SMC firmware fail issue for smu13") Signed-off-by: Evan Quan Reviewed-by: Hawking Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index fd79b213fab48..6d9b3c6af1646 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1415,13 +1415,6 @@ static int smu_disable_dpms(struct smu_context *smu) switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 0): case IP_VERSION(13, 0, 7): - if (!(adev->in_runpm || amdgpu_in_reset(adev))) { - ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD); - if (ret) { - dev_err(adev->dev, "Fail set mp1 state to UNLOAD!\n"); - return ret; - } - } return 0; default: break; From 2207efdd8388bd300a0051b1775705d890abd306 Mon Sep 17 00:00:00 2001 From: Chengming Gui Date: Fri, 15 Jul 2022 13:12:14 +0800 Subject: [PATCH 073/134] drm/amd/amdgpu: add TAP_DELAYS upload support for gfx10 Support {GLOBAL/SE0/SE1/SE2/SE3}_TAP_DELAYS uploading. v2: upload TAP_DELAYS before RLC autoload was triggered. (Hawking) Signed-off-by: Chengming Gui Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 15 +++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h | 10 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 30 +++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 20 +++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 53 ++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h | 2 + 6 files changed, 129 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 6540582ecbf82..3ee363bfbac21 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2168,6 +2168,21 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, case AMDGPU_UCODE_ID_RLC_DRAM: *type = GFX_FW_TYPE_RLC_DRAM_BOOT; break; + case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS: + *type = GFX_FW_TYPE_GLOBAL_TAP_DELAYS; + break; + case AMDGPU_UCODE_ID_SE0_TAP_DELAYS: + *type = GFX_FW_TYPE_SE0_TAP_DELAYS; + break; + case AMDGPU_UCODE_ID_SE1_TAP_DELAYS: + *type = GFX_FW_TYPE_SE1_TAP_DELAYS; + break; + case AMDGPU_UCODE_ID_SE2_TAP_DELAYS: + *type = GFX_FW_TYPE_SE2_TAP_DELAYS; + break; + case AMDGPU_UCODE_ID_SE3_TAP_DELAYS: + *type = GFX_FW_TYPE_SE3_TAP_DELAYS; + break; case AMDGPU_UCODE_ID_SMC: *type = GFX_FW_TYPE_SMU; break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index f6fd9e1a7dac4..03ac36b2c2cfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -222,6 +222,11 @@ struct amdgpu_rlc { u32 rlc_dram_ucode_size_bytes; u32 rlcp_ucode_size_bytes; u32 rlcv_ucode_size_bytes; + u32 global_tap_delays_ucode_size_bytes; + u32 se0_tap_delays_ucode_size_bytes; + u32 se1_tap_delays_ucode_size_bytes; + u32 se2_tap_delays_ucode_size_bytes; + u32 se3_tap_delays_ucode_size_bytes; u32 *register_list_format; u32 *register_restore; @@ -232,6 +237,11 @@ struct amdgpu_rlc { u8 *rlc_dram_ucode; u8 *rlcp_ucode; u8 *rlcv_ucode; + u8 *global_tap_delays_ucode; + u8 *se0_tap_delays_ucode; + u8 *se1_tap_delays_ucode; + u8 *se2_tap_delays_ucode; + u8 *se3_tap_delays_ucode; bool is_rlc_v2_1; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index c312577df596f..939c8614f0e33 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -561,6 +561,16 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id) return "RLC_P"; case AMDGPU_UCODE_ID_RLC_V: return "RLC_V"; + case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS: + return "GLOBAL_TAP_DELAYS"; + case AMDGPU_UCODE_ID_SE0_TAP_DELAYS: + return "SE0_TAP_DELAYS"; + case AMDGPU_UCODE_ID_SE1_TAP_DELAYS: + return "SE1_TAP_DELAYS"; + case AMDGPU_UCODE_ID_SE2_TAP_DELAYS: + return "SE2_TAP_DELAYS"; + case AMDGPU_UCODE_ID_SE3_TAP_DELAYS: + return "SE3_TAP_DELAYS"; case AMDGPU_UCODE_ID_IMU_I: return "IMU_I"; case AMDGPU_UCODE_ID_IMU_D: @@ -745,6 +755,26 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, ucode->ucode_size = adev->gfx.rlc.rlcv_ucode_size_bytes; ucode_addr = adev->gfx.rlc.rlcv_ucode; break; + case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.global_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.global_tap_delays_ucode; + break; + case AMDGPU_UCODE_ID_SE0_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.se0_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.se0_tap_delays_ucode; + break; + case AMDGPU_UCODE_ID_SE1_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.se1_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.se1_tap_delays_ucode; + break; + case AMDGPU_UCODE_ID_SE2_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.se2_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.se2_tap_delays_ucode; + break; + case AMDGPU_UCODE_ID_SE3_TAP_DELAYS: + ucode->ucode_size = adev->gfx.rlc.se3_tap_delays_ucode_size_bytes; + ucode_addr = adev->gfx.rlc.se3_tap_delays_ucode; + break; case AMDGPU_UCODE_ID_CP_MES: ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes); ucode_addr = (u8 *)ucode->fw->data + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index f510b6aa82ab7..ebed3f5226dba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -266,6 +266,21 @@ struct rlc_firmware_header_v2_3 { uint32_t rlcv_ucode_offset_bytes; }; +/* version_major=2, version_minor=4 */ +struct rlc_firmware_header_v2_4 { + struct rlc_firmware_header_v2_3 v2_3; + uint32_t global_tap_delays_ucode_size_bytes; + uint32_t global_tap_delays_ucode_offset_bytes; + uint32_t se0_tap_delays_ucode_size_bytes; + uint32_t se0_tap_delays_ucode_offset_bytes; + uint32_t se1_tap_delays_ucode_size_bytes; + uint32_t se1_tap_delays_ucode_offset_bytes; + uint32_t se2_tap_delays_ucode_size_bytes; + uint32_t se2_tap_delays_ucode_offset_bytes; + uint32_t se3_tap_delays_ucode_size_bytes; + uint32_t se3_tap_delays_ucode_offset_bytes; +}; + /* version_major=1, version_minor=0 */ struct sdma_firmware_header_v1_0 { struct common_firmware_header header; @@ -426,6 +441,11 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_CP_MES1_DATA, AMDGPU_UCODE_ID_IMU_I, AMDGPU_UCODE_ID_IMU_D, + AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS, + AMDGPU_UCODE_ID_SE0_TAP_DELAYS, + AMDGPU_UCODE_ID_SE1_TAP_DELAYS, + AMDGPU_UCODE_ID_SE2_TAP_DELAYS, + AMDGPU_UCODE_ID_SE3_TAP_DELAYS, AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL, AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM, AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 5820c3f0e2150..fafbad3cf08d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3976,6 +3976,23 @@ static void gfx_v10_0_init_rlc_iram_dram_microcode(struct amdgpu_device *adev) adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes); } +static void gfx_v10_0_init_tap_delays_microcode(struct amdgpu_device *adev) +{ + const struct rlc_firmware_header_v2_4 *rlc_hdr; + + rlc_hdr = (const struct rlc_firmware_header_v2_4 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc.global_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->global_tap_delays_ucode_size_bytes); + adev->gfx.rlc.global_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->global_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se0_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se0_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se1_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se1_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se2_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se2_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_offset_bytes); + adev->gfx.rlc.se3_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_size_bytes); + adev->gfx.rlc.se3_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_offset_bytes); +} + static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev) { bool ret = false; @@ -4153,8 +4170,11 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) if (version_major == 2) { if (version_minor >= 1) gfx_v10_0_init_rlc_ext_microcode(adev); - if (version_minor == 2) + if (version_minor >= 2) gfx_v10_0_init_rlc_iram_dram_microcode(adev); + if (version_minor == 4) { + gfx_v10_0_init_tap_delays_microcode(adev); + } } } @@ -4251,8 +4271,39 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) adev->firmware.fw_size += ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE); } + } + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.global_tap_delays_ucode_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE0_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE0_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se0_tap_delays_ucode_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE1_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE1_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se1_tap_delays_ucode_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE2_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE2_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se2_tap_delays_ucode_size_bytes, PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE3_TAP_DELAYS]; + info->ucode_id = AMDGPU_UCODE_ID_SE3_TAP_DELAYS; + info->fw = adev->gfx.rlc_fw; + adev->firmware.fw_size += + ALIGN(adev->gfx.rlc.se3_tap_delays_ucode_size_bytes, PAGE_SIZE); + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; info->fw = adev->gfx.mec_fw; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 236b7a61443a4..22c775f39119b 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -259,6 +259,8 @@ enum psp_gfx_fw_type { GFX_FW_TYPE_SDMA7 = 57, /* SDMA7 MI */ GFX_FW_TYPE_VCN1 = 58, /* VCN1 MI */ GFX_FW_TYPE_CAP = 62, /* CAP_FW */ + GFX_FW_TYPE_SE2_TAP_DELAYS = 65, /* SE2 TAP DELAYS NV */ + GFX_FW_TYPE_SE3_TAP_DELAYS = 66, /* SE3 TAP DELAYS NV */ GFX_FW_TYPE_REG_LIST = 67, /* REG_LIST MI */ GFX_FW_TYPE_IMU_I = 68, /* IMU Instruction FW SOC21 */ GFX_FW_TYPE_IMU_D = 69, /* IMU Data FW SOC21 */ From 44998fbdcdc39253cb10293c3c27dabc040ad7e8 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Tue, 5 Apr 2022 10:42:47 -0400 Subject: [PATCH 074/134] drm/amdgpu: Refactor code to handle non coherent and uncached This simplifies existing coherence handling for Arcturus and Aldabaran to account for !coherent && uncached scenarios. Reviewed-by: Felix Kuehling Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 51 +++++++++---------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index d2d2b16c4d504..08997092e7f74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -410,45 +410,42 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) switch (adev->asic_type) { case CHIP_ARCTURUS: - if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - if (bo_adev == adev) - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; - else - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; - } else { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; - } - break; case CHIP_ALDEBARAN: - if (coherent && uncached) { - if (adev->gmc.xgmi.connected_to_cpu || - !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) - snoop = true; - mapping_flags |= AMDGPU_VM_MTYPE_UC; - } else if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { + if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { if (bo_adev == adev) { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; - if (adev->gmc.xgmi.connected_to_cpu) + if (uncached) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else if (coherent) + mapping_flags |= AMDGPU_VM_MTYPE_CC; + else + mapping_flags |= AMDGPU_VM_MTYPE_RW; + if (adev->asic_type == CHIP_ALDEBARAN && + adev->gmc.xgmi.connected_to_cpu) snoop = true; } else { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + if (uncached || coherent) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; if (amdgpu_xgmi_same_hive(adev, bo_adev)) snoop = true; } } else { + if (uncached || coherent) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; snoop = true; - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; } break; default: - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; + if (uncached || coherent) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; + + if (!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) + snoop = true; } pte_flags = amdgpu_gem_va_map_flags(adev, mapping_flags); From 34a1b0f90e33b00b431d6e8636b787111c6c29d5 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 8 Jul 2022 09:30:43 -0400 Subject: [PATCH 075/134] drm/amd/display: Move insert entry table to the FPU code The insert_entry_into_table_sorted function uses FPU operation and calls other static functions support. This commit moves the insert entry function with all the required struct and static functions to the FPU file. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn32/dcn32_resource.c | 188 +----------------- .../drm/amd/display/dc/dcn32/dcn32_resource.h | 3 + .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 186 +++++++++++++++++ .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 9 + 4 files changed, 208 insertions(+), 178 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index c5ba411d12ba2..d52192ab9421b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -115,137 +115,6 @@ static const struct IP_BASE DCN_BASE = { { { { 0x00000012, 0x000000C0, 0x000034C #define DC_LOGGER_INIT(logger) -#define DCN3_2_DEFAULT_DET_SIZE 256 -#define DCN3_2_MAX_DET_SIZE 1152 -#define DCN3_2_MIN_DET_SIZE 128 -#define DCN3_2_MIN_COMPBUF_SIZE_KB 128 - -struct _vcs_dpi_ip_params_st dcn3_2_ip = { - .gpuvm_enable = 0, - .gpuvm_max_page_table_levels = 4, - .hostvm_enable = 0, - .rob_buffer_size_kbytes = 128, - .det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE, - .config_return_buffer_size_in_kbytes = 1280, - .compressed_buffer_segment_size_in_kbytes = 64, - .meta_fifo_size_in_kentries = 22, - .zero_size_buffer_entries = 512, - .compbuf_reserved_space_64b = 256, - .compbuf_reserved_space_zs = 64, - .dpp_output_buffer_pixels = 2560, - .opp_output_buffer_lines = 1, - .pixel_chunk_size_kbytes = 8, - .alpha_pixel_chunk_size_kbytes = 4, // not appearing in spreadsheet, match c code from hw team - .min_pixel_chunk_size_bytes = 1024, - .dcc_meta_buffer_size_bytes = 6272, - .meta_chunk_size_kbytes = 2, - .min_meta_chunk_size_bytes = 256, - .writeback_chunk_size_kbytes = 8, - .ptoi_supported = false, - .num_dsc = 4, - .maximum_dsc_bits_per_component = 12, - .maximum_pixels_per_line_per_dsc_unit = 6016, - .dsc422_native_support = true, - .is_line_buffer_bpp_fixed = true, - .line_buffer_fixed_bpp = 57, - .line_buffer_size_bits = 1171920, //DPP doc, DCN3_2_DisplayMode_73.xlsm still shows as 986880 bits with 48 bpp - .max_line_buffer_lines = 32, - .writeback_interface_buffer_size_kbytes = 90, - .max_num_dpp = 4, - .max_num_otg = 4, - .max_num_hdmi_frl_outputs = 1, - .max_num_wb = 1, - .max_dchub_pscl_bw_pix_per_clk = 4, - .max_pscl_lb_bw_pix_per_clk = 2, - .max_lb_vscl_bw_pix_per_clk = 4, - .max_vscl_hscl_bw_pix_per_clk = 4, - .max_hscl_ratio = 6, - .max_vscl_ratio = 6, - .max_hscl_taps = 8, - .max_vscl_taps = 8, - .dpte_buffer_size_in_pte_reqs_luma = 64, - .dpte_buffer_size_in_pte_reqs_chroma = 34, - .dispclk_ramp_margin_percent = 1, - .max_inter_dcn_tile_repeaters = 8, - .cursor_buffer_size = 16, - .cursor_chunk_size = 2, - .writeback_line_buffer_buffer_size = 0, - .writeback_min_hscl_ratio = 1, - .writeback_min_vscl_ratio = 1, - .writeback_max_hscl_ratio = 1, - .writeback_max_vscl_ratio = 1, - .writeback_max_hscl_taps = 1, - .writeback_max_vscl_taps = 1, - .dppclk_delay_subtotal = 47, - .dppclk_delay_scl = 50, - .dppclk_delay_scl_lb_only = 16, - .dppclk_delay_cnvc_formatter = 28, - .dppclk_delay_cnvc_cursor = 6, - .dispclk_delay_subtotal = 125, - .dynamic_metadata_vm_enabled = false, - .odm_combine_4to1_supported = false, - .dcc_supported = true, - .max_num_dp2p0_outputs = 2, - .max_num_dp2p0_streams = 4, -}; - -struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = { - .clock_limits = { - { - .state = 0, - .dcfclk_mhz = 1564.0, - .fabricclk_mhz = 400.0, - .dispclk_mhz = 2150.0, - .dppclk_mhz = 2150.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .phyclk_d32_mhz = 625.0, - .socclk_mhz = 1200.0, - .dscclk_mhz = 716.667, - .dram_speed_mts = 1600.0, - .dtbclk_mhz = 1564.0, - }, - }, - .num_states = 1, - .sr_exit_time_us = 5.20, - .sr_enter_plus_exit_time_us = 9.60, - .sr_exit_z8_time_us = 285.0, - .sr_enter_plus_exit_z8_time_us = 320, - .writeback_latency_us = 12.0, - .round_trip_ping_latency_dcfclk_cycles = 263, - .urgent_latency_pixel_data_only_us = 4.0, - .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, - .urgent_latency_vm_data_only_us = 4.0, - .fclk_change_latency_us = 20, - .usr_retraining_latency_us = 2, - .smn_latency_us = 2, - .mall_allocated_for_dcn_mbytes = 64, - .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, - .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, - .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, - .pct_ideal_sdp_bw_after_urgent = 100.0, - .pct_ideal_fabric_bw_after_urgent = 67.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented - .pct_ideal_dram_bw_after_urgent_strobe = 67.0, - .max_avg_sdp_bw_use_normal_percent = 80.0, - .max_avg_fabric_bw_use_normal_percent = 60.0, - .max_avg_dram_bw_use_normal_strobe_percent = 50.0, - .max_avg_dram_bw_use_normal_percent = 15.0, - .num_chans = 8, - .dram_channel_width_bytes = 2, - .fabric_datapath_to_dcn_data_return_bytes = 64, - .return_bus_width_bytes = 64, - .downspread_percent = 0.38, - .dcn_downspread_percent = 0.5, - .dram_clock_change_latency_us = 400, - .dispclk_dppclk_vco_speed_mhz = 4300.0, - .do_urgent_latency_adjustment = true, - .urgent_latency_adjustment_fabric_clock_component_us = 1.0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, -}; - enum dcn32_clk_src_array_id { DCN32_CLK_SRC_PLL0, DCN32_CLK_SRC_PLL1, @@ -3454,53 +3323,6 @@ static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) } } -static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st *entry) -{ - float memory_bw_kbytes_sec = entry->dram_speed_mts * dcn3_2_soc.num_chans * - dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); - - float fabric_bw_kbytes_sec = entry->fabricclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100); - - float sdp_bw_kbytes_sec = entry->dcfclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100); - - float limiting_bw_kbytes_sec = memory_bw_kbytes_sec; - - if (fabric_bw_kbytes_sec < limiting_bw_kbytes_sec) - limiting_bw_kbytes_sec = fabric_bw_kbytes_sec; - - if (sdp_bw_kbytes_sec < limiting_bw_kbytes_sec) - limiting_bw_kbytes_sec = sdp_bw_kbytes_sec; - - return limiting_bw_kbytes_sec; -} - -static void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, - struct _vcs_dpi_voltage_scaling_st *entry) -{ - int index = 0; - int i = 0; - float net_bw_of_new_state = 0; - - if (*num_entries == 0) { - table[0] = *entry; - (*num_entries)++; - } else { - net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry); - while (net_bw_of_new_state > calculate_net_bw_in_kbytes_sec(&table[index])) { - index++; - if (index >= *num_entries) - break; - } - - for (i = *num_entries; i > index; i--) { - table[i] = table[i - 1]; - } - - table[index] = *entry; - (*num_entries)++; - } -} - static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, unsigned int index) { @@ -3585,7 +3407,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.dram_speed_mts = 0; get_optimal_ntuple(&entry); + DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); } // Insert the max DCFCLK @@ -3594,7 +3418,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.dram_speed_mts = 0; get_optimal_ntuple(&entry); + DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); // Insert the UCLK DPMS for (i = 0; i < num_uclk_dpms; i++) { @@ -3603,7 +3429,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16; get_optimal_ntuple(&entry); + DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); } // If FCLK is coarse grained, insert individual DPMs. @@ -3614,7 +3442,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.dram_speed_mts = 0; get_optimal_ntuple(&entry); + DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); } } // If FCLK fine grained, only insert max @@ -3624,7 +3454,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.dram_speed_mts = 0; get_optimal_ntuple(&entry); + DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); } // At this point, the table contains all "points of interest" based on diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index 901aa7e13bd2e..7ccad84b1f161 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -33,6 +33,9 @@ #define TO_DCN32_RES_POOL(pool)\ container_of(pool, struct dcn32_resource_pool, base) +extern struct _vcs_dpi_ip_params_st dcn3_2_ip; +extern struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc; + struct dcn32_resource_pool { struct resource_pool base; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 1b9e34f1232ab..4223a9a9dd457 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -28,6 +28,132 @@ // We need this includes for WATERMARKS_* defines #include "clk_mgr/dcn32/dcn32_smu13_driver_if.h" +struct _vcs_dpi_ip_params_st dcn3_2_ip = { + .gpuvm_enable = 0, + .gpuvm_max_page_table_levels = 4, + .hostvm_enable = 0, + .rob_buffer_size_kbytes = 128, + .det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE, + .config_return_buffer_size_in_kbytes = 1280, + .compressed_buffer_segment_size_in_kbytes = 64, + .meta_fifo_size_in_kentries = 22, + .zero_size_buffer_entries = 512, + .compbuf_reserved_space_64b = 256, + .compbuf_reserved_space_zs = 64, + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .alpha_pixel_chunk_size_kbytes = 4, + .min_pixel_chunk_size_bytes = 1024, + .dcc_meta_buffer_size_bytes = 6272, + .meta_chunk_size_kbytes = 2, + .min_meta_chunk_size_bytes = 256, + .writeback_chunk_size_kbytes = 8, + .ptoi_supported = false, + .num_dsc = 4, + .maximum_dsc_bits_per_component = 12, + .maximum_pixels_per_line_per_dsc_unit = 6016, + .dsc422_native_support = true, + .is_line_buffer_bpp_fixed = true, + .line_buffer_fixed_bpp = 57, + .line_buffer_size_bits = 1171920, + .max_line_buffer_lines = 32, + .writeback_interface_buffer_size_kbytes = 90, + .max_num_dpp = 4, + .max_num_otg = 4, + .max_num_hdmi_frl_outputs = 1, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 6, + .max_vscl_ratio = 6, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dpte_buffer_size_in_pte_reqs_luma = 64, + .dpte_buffer_size_in_pte_reqs_chroma = 34, + .dispclk_ramp_margin_percent = 1, + .max_inter_dcn_tile_repeaters = 8, + .cursor_buffer_size = 16, + .cursor_chunk_size = 2, + .writeback_line_buffer_buffer_size = 0, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_max_hscl_taps = 1, + .writeback_max_vscl_taps = 1, + .dppclk_delay_subtotal = 47, + .dppclk_delay_scl = 50, + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_cnvc_formatter = 28, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 125, + .dynamic_metadata_vm_enabled = false, + .odm_combine_4to1_supported = false, + .dcc_supported = true, + .max_num_dp2p0_outputs = 2, + .max_num_dp2p0_streams = 4, +}; + +struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = { + .clock_limits = { + { + .state = 0, + .dcfclk_mhz = 1564.0, + .fabricclk_mhz = 400.0, + .dispclk_mhz = 2150.0, + .dppclk_mhz = 2150.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .phyclk_d32_mhz = 625.0, + .socclk_mhz = 1200.0, + .dscclk_mhz = 716.667, + .dram_speed_mts = 16000.0, + .dtbclk_mhz = 1564.0, + }, + }, + .num_states = 1, + .sr_exit_time_us = 5.20, + .sr_enter_plus_exit_time_us = 9.60, + .sr_exit_z8_time_us = 285.0, + .sr_enter_plus_exit_z8_time_us = 320, + .writeback_latency_us = 12.0, + .round_trip_ping_latency_dcfclk_cycles = 263, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .fclk_change_latency_us = 20, + .usr_retraining_latency_us = 2, + .smn_latency_us = 2, + .mall_allocated_for_dcn_mbytes = 64, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_sdp_bw_after_urgent = 100.0, + .pct_ideal_fabric_bw_after_urgent = 67.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented + .pct_ideal_dram_bw_after_urgent_strobe = 67.0, + .max_avg_sdp_bw_use_normal_percent = 80.0, + .max_avg_fabric_bw_use_normal_percent = 60.0, + .max_avg_dram_bw_use_normal_strobe_percent = 50.0, + .max_avg_dram_bw_use_normal_percent = 15.0, + .num_chans = 8, + .dram_channel_width_bytes = 2, + .fabric_datapath_to_dcn_data_return_bytes = 64, + .return_bus_width_bytes = 64, + .downspread_percent = 0.38, + .dcn_downspread_percent = 0.5, + .dram_clock_change_latency_us = 400, + .dispclk_dppclk_vco_speed_mhz = 4300.0, + .do_urgent_latency_adjustment = true, + .urgent_latency_adjustment_fabric_clock_component_us = 1.0, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, +}; + void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr) { /* defaults */ @@ -191,3 +317,63 @@ bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st p return false; } +static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st *entry) +{ + float memory_bw_kbytes_sec; + float fabric_bw_kbytes_sec; + float sdp_bw_kbytes_sec; + float limiting_bw_kbytes_sec; + + memory_bw_kbytes_sec = entry->dram_speed_mts * + dcn3_2_soc.num_chans * + dcn3_2_soc.dram_channel_width_bytes * + ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); + + fabric_bw_kbytes_sec = entry->fabricclk_mhz * + dcn3_2_soc.return_bus_width_bytes * + ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100); + + sdp_bw_kbytes_sec = entry->dcfclk_mhz * + dcn3_2_soc.return_bus_width_bytes * + ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100); + + limiting_bw_kbytes_sec = memory_bw_kbytes_sec; + + if (fabric_bw_kbytes_sec < limiting_bw_kbytes_sec) + limiting_bw_kbytes_sec = fabric_bw_kbytes_sec; + + if (sdp_bw_kbytes_sec < limiting_bw_kbytes_sec) + limiting_bw_kbytes_sec = sdp_bw_kbytes_sec; + + return limiting_bw_kbytes_sec; +} + +void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, + unsigned int *num_entries, + struct _vcs_dpi_voltage_scaling_st *entry) +{ + int i = 0; + int index = 0; + float net_bw_of_new_state = 0; + + dc_assert_fp_enabled(); + + if (*num_entries == 0) { + table[0] = *entry; + (*num_entries)++; + } else { + net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry); + while (net_bw_of_new_state > calculate_net_bw_in_kbytes_sec(&table[index])) { + index++; + if (index >= *num_entries) + break; + } + + for (i = *num_entries; i > index; i--) + table[i] = table[i - 1]; + + table[index] = *entry; + (*num_entries)++; + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index d5f157cdd0b4f..62cb0c1d462cb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -29,6 +29,11 @@ #include "clk_mgr_internal.h" +#define DCN3_2_DEFAULT_DET_SIZE 256 +#define DCN3_2_MAX_DET_SIZE 1152 +#define DCN3_2_MIN_DET_SIZE 128 +#define DCN3_2_MIN_COMPBUF_SIZE_KB 128 + void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr); void dcn32_helper_populate_phantom_dlg_params(struct dc *dc, @@ -40,4 +45,8 @@ bool dcn32_predict_pipe_split(struct dc_state *context, display_pipe_params_st pipe, int index); +void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, + unsigned int *num_entries, + struct _vcs_dpi_voltage_scaling_st *entry); + #endif From 8f5bb69dd453666ff949079443ebce24381e163c Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 8 Jul 2022 10:28:23 -0400 Subject: [PATCH 076/134] drm/amd/display: Move phanton stream to FPU code This commit moves phanton FPU stream to dcn32_fpu file. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn32/dcn32_resource.c | 89 +------------------ .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 84 +++++++++++++++++ .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 8 ++ 3 files changed, 94 insertions(+), 87 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index d52192ab9421b..aee66c9ffcb00 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -1830,93 +1830,6 @@ static void dcn32_enable_phantom_plane(struct dc *dc, } } -/** - * *************************************************************************************** - * dcn32_set_phantom_stream_timing: Set timing params for the phantom stream - * - * Set timing params of the phantom stream based on calculated output from DML. - * This function first gets the DML pipe index using the DC pipe index, then - * calls into DML (get_subviewport_lines_needed_in_mall) to get the number of - * lines required for SubVP MCLK switching and assigns to the phantom stream - * accordingly. - * - * - The number of SubVP lines calculated in DML does not take into account - * FW processing delays and required pstate allow width, so we must include - * that separately. - * - * - Set phantom backporch = vstartup of main pipe - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * @param [in] ref_pipe: Main pipe for the phantom stream - * @param [in] pipes: DML pipe params - * @param [in] pipe_cnt: number of DML pipes - * @param [in] dc_pipe_idx: DC pipe index for the main pipe (i.e. ref_pipe) - * - * @return: void - * - * *************************************************************************************** - */ -static void dcn32_set_phantom_stream_timing(struct dc *dc, - struct dc_state *context, - struct pipe_ctx *ref_pipe, - struct dc_stream_state *phantom_stream, - display_e2e_pipe_params_st *pipes, - unsigned int pipe_cnt, - unsigned int dc_pipe_idx) -{ - unsigned int i, pipe_idx; - struct pipe_ctx *pipe; - uint32_t phantom_vactive, phantom_bp, pstate_width_fw_delay_lines; - unsigned int vlevel = context->bw_ctx.dml.vba.VoltageLevel; - unsigned int dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; - unsigned int socclk = context->bw_ctx.dml.vba.SOCCLKPerState[vlevel]; - - // Find DML pipe index (pipe_idx) using dc_pipe_idx - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &context->res_ctx.pipe_ctx[i]; - - if (!pipe->stream) - continue; - - if (i == dc_pipe_idx) - break; - - pipe_idx++; - } - - // Calculate lines required for pstate allow width and FW processing delays - pstate_width_fw_delay_lines = ((double)(dc->caps.subvp_fw_processing_delay_us + - dc->caps.subvp_pstate_allow_width_us) / 1000000) * - (ref_pipe->stream->timing.pix_clk_100hz * 100) / - (double)ref_pipe->stream->timing.h_total; - - // Update clks_cfg for calling into recalculate - pipes[0].clks_cfg.voltage = vlevel; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk; - pipes[0].clks_cfg.socclk_mhz = socclk; - - // DML calculation for MALL region doesn't take into account FW delay - // and required pstate allow width for multi-display cases - phantom_vactive = get_subviewport_lines_needed_in_mall(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx) + - pstate_width_fw_delay_lines; - - // For backporch of phantom pipe, use vstartup of the main pipe - phantom_bp = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - - phantom_stream->dst.y = 0; - phantom_stream->dst.height = phantom_vactive; - phantom_stream->src.y = 0; - phantom_stream->src.height = phantom_vactive; - - phantom_stream->timing.v_addressable = phantom_vactive; - phantom_stream->timing.v_front_porch = 1; - phantom_stream->timing.v_total = phantom_stream->timing.v_addressable + - phantom_stream->timing.v_front_porch + - phantom_stream->timing.v_sync_width + - phantom_bp; -} - static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, @@ -1938,7 +1851,9 @@ static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc, memcpy(&phantom_stream->timing, &ref_pipe->stream->timing, sizeof(phantom_stream->timing)); memcpy(&phantom_stream->src, &ref_pipe->stream->src, sizeof(phantom_stream->src)); memcpy(&phantom_stream->dst, &ref_pipe->stream->dst, sizeof(phantom_stream->dst)); + DC_FP_START(); dcn32_set_phantom_stream_timing(dc, context, ref_pipe, phantom_stream, pipes, pipe_cnt, dc_pipe_idx); + DC_FP_END(); dc_add_stream_to_ctx(dc, context, phantom_stream); return phantom_stream; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 4223a9a9dd457..74ccf453349ca 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -377,3 +377,87 @@ void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, } } +/** + * dcn32_set_phantom_stream_timing: Set timing params for the phantom stream + * + * Set timing params of the phantom stream based on calculated output from DML. + * This function first gets the DML pipe index using the DC pipe index, then + * calls into DML (get_subviewport_lines_needed_in_mall) to get the number of + * lines required for SubVP MCLK switching and assigns to the phantom stream + * accordingly. + * + * - The number of SubVP lines calculated in DML does not take into account + * FW processing delays and required pstate allow width, so we must include + * that separately. + * + * - Set phantom backporch = vstartup of main pipe + * + * @dc: current dc state + * @context: new dc state + * @ref_pipe: Main pipe for the phantom stream + * @pipes: DML pipe params + * @pipe_cnt: number of DML pipes + * @dc_pipe_idx: DC pipe index for the main pipe (i.e. ref_pipe) + */ +void dcn32_set_phantom_stream_timing(struct dc *dc, + struct dc_state *context, + struct pipe_ctx *ref_pipe, + struct dc_stream_state *phantom_stream, + display_e2e_pipe_params_st *pipes, + unsigned int pipe_cnt, + unsigned int dc_pipe_idx) +{ + unsigned int i, pipe_idx; + struct pipe_ctx *pipe; + uint32_t phantom_vactive, phantom_bp, pstate_width_fw_delay_lines; + unsigned int vlevel = context->bw_ctx.dml.vba.VoltageLevel; + unsigned int dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + unsigned int socclk = context->bw_ctx.dml.vba.SOCCLKPerState[vlevel]; + + dc_assert_fp_enabled(); + + // Find DML pipe index (pipe_idx) using dc_pipe_idx + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->stream) + continue; + + if (i == dc_pipe_idx) + break; + + pipe_idx++; + } + + // Calculate lines required for pstate allow width and FW processing delays + pstate_width_fw_delay_lines = ((double)(dc->caps.subvp_fw_processing_delay_us + + dc->caps.subvp_pstate_allow_width_us) / 1000000) * + (ref_pipe->stream->timing.pix_clk_100hz * 100) / + (double)ref_pipe->stream->timing.h_total; + + // Update clks_cfg for calling into recalculate + pipes[0].clks_cfg.voltage = vlevel; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; + pipes[0].clks_cfg.socclk_mhz = socclk; + + // DML calculation for MALL region doesn't take into account FW delay + // and required pstate allow width for multi-display cases + phantom_vactive = get_subviewport_lines_needed_in_mall(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx) + + pstate_width_fw_delay_lines; + + // For backporch of phantom pipe, use vstartup of the main pipe + phantom_bp = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + + phantom_stream->dst.y = 0; + phantom_stream->dst.height = phantom_vactive; + phantom_stream->src.y = 0; + phantom_stream->src.height = phantom_vactive; + + phantom_stream->timing.v_addressable = phantom_vactive; + phantom_stream->timing.v_front_porch = 1; + phantom_stream->timing.v_total = phantom_stream->timing.v_addressable + + phantom_stream->timing.v_front_porch + + phantom_stream->timing.v_sync_width + + phantom_bp; +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index 62cb0c1d462cb..4abef908dca90 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -49,4 +49,12 @@ void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, struct _vcs_dpi_voltage_scaling_st *entry); +void dcn32_set_phantom_stream_timing(struct dc *dc, + struct dc_state *context, + struct pipe_ctx *ref_pipe, + struct dc_stream_state *phantom_stream, + display_e2e_pipe_params_st *pipes, + unsigned int pipe_cnt, + unsigned int dc_pipe_idx); + #endif From 4cef2269318cdf5beb70c257f7aaba267f2157ae Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 8 Jul 2022 10:51:01 -0400 Subject: [PATCH 077/134] drm/amd/display: Move SubVP functions to dcn32_fpu It looks like many of the code related to SubVP uses FPU operation, and we have many static functions that are part of this feature. This commit is a little bit large, but it only moves SubVP operation from one file to another, and I had to do it in a single change due to dependencies between functions. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn32/dcn32_resource.c | 597 +----------------- .../drm/amd/display/dc/dcn32/dcn32_resource.h | 2 + .../display/dc/dcn32/dcn32_resource_helpers.c | 11 + .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 597 ++++++++++++++++++ .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 8 + 5 files changed, 620 insertions(+), 595 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index aee66c9ffcb00..fb48293fa04ec 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -1624,171 +1624,6 @@ bool dcn32_release_post_bldn_3dlut( return ret; } -/** - ******************************************************************************************** - * dcn32_get_num_free_pipes: Calculate number of free pipes - * - * This function assumes that a "used" pipe is a pipe that has - * both a stream and a plane assigned to it. - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * - * @return: Number of free pipes available in the context - * - ******************************************************************************************** - */ -static unsigned int dcn32_get_num_free_pipes(struct dc *dc, struct dc_state *context) -{ - unsigned int i; - unsigned int free_pipes = 0; - unsigned int num_pipes = 0; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (pipe->stream && !pipe->top_pipe) { - while (pipe) { - num_pipes++; - pipe = pipe->bottom_pipe; - } - } - } - - free_pipes = dc->res_pool->pipe_count - num_pipes; - return free_pipes; -} - -/** - ******************************************************************************************** - * dcn32_assign_subvp_pipe: Function to decide which pipe will use Sub-VP. - * - * We enter this function if we are Sub-VP capable (i.e. enough pipes available) - * and regular P-State switching (i.e. VACTIVE/VBLANK) is not supported, or if - * we are forcing SubVP P-State switching on the current config. - * - * The number of pipes used for the chosen surface must be less than or equal to the - * number of free pipes available. - * - * In general we choose surfaces with the longest frame time first (better for SubVP + VBLANK). - * For multi-display cases the ActiveDRAMClockChangeMargin doesn't provide enough info on its own - * for determining which should be the SubVP pipe (need a way to determine if a pipe / plane doesn't - * support MCLK switching naturally [i.e. ACTIVE or VBLANK]). - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * @param [out] index: dc pipe index for the pipe chosen to have phantom pipes assigned - * - * @return: True if a valid pipe assignment was found for Sub-VP. Otherwise false. - * - ******************************************************************************************** - */ - -static bool dcn32_assign_subvp_pipe(struct dc *dc, - struct dc_state *context, - unsigned int *index) -{ - unsigned int i, pipe_idx; - unsigned int max_frame_time = 0; - bool valid_assignment_found = false; - unsigned int free_pipes = dcn32_get_num_free_pipes(dc, context); - bool current_assignment_freesync = false; - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - unsigned int num_pipes = 0; - - if (!pipe->stream) - continue; - - if (pipe->plane_state && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_NONE) { - while (pipe) { - num_pipes++; - pipe = pipe->bottom_pipe; - } - - pipe = &context->res_ctx.pipe_ctx[i]; - if (num_pipes <= free_pipes) { - struct dc_stream_state *stream = pipe->stream; - unsigned int frame_us = (stream->timing.v_total * stream->timing.h_total / - (double)(stream->timing.pix_clk_100hz * 100)) * 1000000; - if (frame_us > max_frame_time && !stream->ignore_msa_timing_param) { - *index = i; - max_frame_time = frame_us; - valid_assignment_found = true; - current_assignment_freesync = false; - /* For the 2-Freesync display case, still choose the one with the - * longest frame time - */ - } else if (stream->ignore_msa_timing_param && (!valid_assignment_found || - (current_assignment_freesync && frame_us > max_frame_time))) { - *index = i; - valid_assignment_found = true; - current_assignment_freesync = true; - } - } - } - pipe_idx++; - } - return valid_assignment_found; -} - -/** - * *************************************************************************************** - * dcn32_enough_pipes_for_subvp: Function to check if there are "enough" pipes for SubVP. - * - * This function returns true if there are enough free pipes - * to create the required phantom pipes for any given stream - * (that does not already have phantom pipe assigned). - * - * e.g. For a 2 stream config where the first stream uses one - * pipe and the second stream uses 2 pipes (i.e. pipe split), - * this function will return true because there is 1 remaining - * pipe which can be used as the phantom pipe for the non pipe - * split pipe. - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * - * @return: True if there are enough free pipes to assign phantom pipes to at least one - * stream that does not already have phantom pipes assigned. Otherwise false. - * - * *************************************************************************************** - */ -static bool dcn32_enough_pipes_for_subvp(struct dc *dc, struct dc_state *context) -{ - unsigned int i, split_cnt, free_pipes; - unsigned int min_pipe_split = dc->res_pool->pipe_count + 1; // init as max number of pipes + 1 - bool subvp_possible = false; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - // Find the minimum pipe split count for non SubVP pipes - if (pipe->stream && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_NONE) { - split_cnt = 0; - while (pipe) { - split_cnt++; - pipe = pipe->bottom_pipe; - } - - if (split_cnt < min_pipe_split) - min_pipe_split = split_cnt; - } - } - - free_pipes = dcn32_get_num_free_pipes(dc, context); - - // SubVP only possible if at least one pipe is being used (i.e. free_pipes - // should not equal to the pipe_count) - if (free_pipes >= min_pipe_split && free_pipes < dc->res_pool->pipe_count) - subvp_possible = true; - - return subvp_possible; -} - static void dcn32_enable_phantom_plane(struct dc *dc, struct dc_state *context, struct dc_stream_state *phantom_stream, @@ -2033,436 +1868,6 @@ static struct pipe_ctx *dcn32_find_split_pipe( return pipe; } - -/** - * *************************************************************************************** - * subvp_subvp_schedulable: Determine if SubVP + SubVP config is schedulable - * - * High level algorithm: - * 1. Find longest microschedule length (in us) between the two SubVP pipes - * 2. Check if the worst case overlap (VBLANK in middle of ACTIVE) for both - * pipes still allows for the maximum microschedule to fit in the active - * region for both pipes. - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * - * @return: bool - True if the SubVP + SubVP config is schedulable, false otherwise - * - * *************************************************************************************** - */ -static bool subvp_subvp_schedulable(struct dc *dc, struct dc_state *context) -{ - struct pipe_ctx *subvp_pipes[2]; - struct dc_stream_state *phantom = NULL; - uint32_t microschedule_lines = 0; - uint32_t index = 0; - uint32_t i; - uint32_t max_microschedule_us = 0; - int32_t vactive1_us, vactive2_us, vblank1_us, vblank2_us; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - uint32_t time_us = 0; - - /* Loop to calculate the maximum microschedule time between the two SubVP pipes, - * and also to store the two main SubVP pipe pointers in subvp_pipes[2]. - */ - if (pipe->stream && pipe->plane_state && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_MAIN) { - phantom = pipe->stream->mall_stream_config.paired_stream; - microschedule_lines = (phantom->timing.v_total - phantom->timing.v_front_porch) + - phantom->timing.v_addressable; - - // Round up when calculating microschedule time (+ 1 at the end) - time_us = (microschedule_lines * phantom->timing.h_total) / - (double)(phantom->timing.pix_clk_100hz * 100) * 1000000 + - dc->caps.subvp_prefetch_end_to_mall_start_us + - dc->caps.subvp_fw_processing_delay_us + 1; - if (time_us > max_microschedule_us) - max_microschedule_us = time_us; - - subvp_pipes[index] = pipe; - index++; - - // Maximum 2 SubVP pipes - if (index == 2) - break; - } - } - vactive1_us = ((subvp_pipes[0]->stream->timing.v_addressable * subvp_pipes[0]->stream->timing.h_total) / - (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000; - vactive2_us = ((subvp_pipes[1]->stream->timing.v_addressable * subvp_pipes[1]->stream->timing.h_total) / - (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000; - vblank1_us = (((subvp_pipes[0]->stream->timing.v_total - subvp_pipes[0]->stream->timing.v_addressable) * - subvp_pipes[0]->stream->timing.h_total) / - (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000; - vblank2_us = (((subvp_pipes[1]->stream->timing.v_total - subvp_pipes[1]->stream->timing.v_addressable) * - subvp_pipes[1]->stream->timing.h_total) / - (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000; - - if ((vactive1_us - vblank2_us) / 2 > max_microschedule_us && - (vactive2_us - vblank1_us) / 2 > max_microschedule_us) - return true; - - return false; -} - -/** - * *************************************************************************************** - * subvp_drr_schedulable: Determine if SubVP + DRR config is schedulable - * - * High level algorithm: - * 1. Get timing for SubVP pipe, phantom pipe, and DRR pipe - * 2. Determine the frame time for the DRR display when adding required margin for MCLK switching - * (the margin is equal to the MALL region + DRR margin (500us)) - * 3.If (SubVP Active - Prefetch > Stretched DRR frame + max(MALL region, Stretched DRR frame)) - * then report the configuration as supported - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * @param [in] drr_pipe: DRR pipe_ctx for the SubVP + DRR config - * - * @return: bool - True if the SubVP + DRR config is schedulable, false otherwise - * - * *************************************************************************************** - */ -static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context, struct pipe_ctx *drr_pipe) -{ - bool schedulable = false; - uint32_t i; - struct pipe_ctx *pipe = NULL; - struct dc_crtc_timing *main_timing = NULL; - struct dc_crtc_timing *phantom_timing = NULL; - struct dc_crtc_timing *drr_timing = NULL; - int16_t prefetch_us = 0; - int16_t mall_region_us = 0; - int16_t drr_frame_us = 0; // nominal frame time - int16_t subvp_active_us = 0; - int16_t stretched_drr_us = 0; - int16_t drr_stretched_vblank_us = 0; - int16_t max_vblank_mallregion = 0; - - // Find SubVP pipe - for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &context->res_ctx.pipe_ctx[i]; - - // We check for master pipe, but it shouldn't matter since we only need - // the pipe for timing info (stream should be same for any pipe splits) - if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe) - continue; - - // Find the SubVP pipe - if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) - break; - } - - main_timing = &pipe->stream->timing; - phantom_timing = &pipe->stream->mall_stream_config.paired_stream->timing; - drr_timing = &drr_pipe->stream->timing; - prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / - (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + - dc->caps.subvp_prefetch_end_to_mall_start_us; - subvp_active_us = main_timing->v_addressable * main_timing->h_total / - (double)(main_timing->pix_clk_100hz * 100) * 1000000; - drr_frame_us = drr_timing->v_total * drr_timing->h_total / - (double)(drr_timing->pix_clk_100hz * 100) * 1000000; - // P-State allow width and FW delays already included phantom_timing->v_addressable - mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total / - (double)(phantom_timing->pix_clk_100hz * 100) * 1000000; - stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US; - drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total / - (double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us); - max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us; - - /* We consider SubVP + DRR schedulable if the stretched frame duration of the DRR display (i.e. the - * highest refresh rate + margin that can support UCLK P-State switch) passes the static analysis - * for VBLANK: (VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, - * and the max of (VBLANK blanking time, MALL region)). - */ - if (stretched_drr_us < (1 / (double)drr_timing->min_refresh_in_uhz) * 1000000 * 1000000 && - subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0) - schedulable = true; - - return schedulable; -} - -/** - * *************************************************************************************** - * subvp_vblank_schedulable: Determine if SubVP + VBLANK config is schedulable - * - * High level algorithm: - * 1. Get timing for SubVP pipe, phantom pipe, and VBLANK pipe - * 2. If (SubVP Active - Prefetch > Vblank Frame Time + max(MALL region, Vblank blanking time)) - * then report the configuration as supported - * 3. If the VBLANK display is DRR, then take the DRR static schedulability path - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * - * @return: bool - True if the SubVP + VBLANK/DRR config is schedulable, false otherwise - * - * *************************************************************************************** - */ -static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context) -{ - struct pipe_ctx *pipe = NULL; - struct pipe_ctx *subvp_pipe = NULL; - bool found = false; - bool schedulable = false; - uint32_t i = 0; - uint8_t vblank_index = 0; - uint16_t prefetch_us = 0; - uint16_t mall_region_us = 0; - uint16_t vblank_frame_us = 0; - uint16_t subvp_active_us = 0; - uint16_t vblank_blank_us = 0; - uint16_t max_vblank_mallregion = 0; - struct dc_crtc_timing *main_timing = NULL; - struct dc_crtc_timing *phantom_timing = NULL; - struct dc_crtc_timing *vblank_timing = NULL; - - /* For SubVP + VBLANK/DRR cases, we assume there can only be - * a single VBLANK/DRR display. If DML outputs SubVP + VBLANK - * is supported, it is either a single VBLANK case or two VBLANK - * displays which are synchronized (in which case they have identical - * timings). - */ - for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &context->res_ctx.pipe_ctx[i]; - - // We check for master pipe, but it shouldn't matter since we only need - // the pipe for timing info (stream should be same for any pipe splits) - if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe) - continue; - - if (!found && pipe->stream->mall_stream_config.type == SUBVP_NONE) { - // Found pipe which is not SubVP or Phantom (i.e. the VBLANK pipe). - vblank_index = i; - found = true; - } - - if (!subvp_pipe && pipe->stream->mall_stream_config.type == SUBVP_MAIN) - subvp_pipe = pipe; - } - // Use ignore_msa_timing_param flag to identify as DRR - if (found && context->res_ctx.pipe_ctx[vblank_index].stream->ignore_msa_timing_param) { - // SUBVP + DRR case - schedulable = subvp_drr_schedulable(dc, context, &context->res_ctx.pipe_ctx[vblank_index]); - } else if (found) { - main_timing = &subvp_pipe->stream->timing; - phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing; - vblank_timing = &context->res_ctx.pipe_ctx[vblank_index].stream->timing; - // Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe - // Also include the prefetch end to mallstart delay time - prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / - (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + - dc->caps.subvp_prefetch_end_to_mall_start_us; - // P-State allow width and FW delays already included phantom_timing->v_addressable - mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total / - (double)(phantom_timing->pix_clk_100hz * 100) * 1000000; - vblank_frame_us = vblank_timing->v_total * vblank_timing->h_total / - (double)(vblank_timing->pix_clk_100hz * 100) * 1000000; - vblank_blank_us = (vblank_timing->v_total - vblank_timing->v_addressable) * vblank_timing->h_total / - (double)(vblank_timing->pix_clk_100hz * 100) * 1000000; - subvp_active_us = main_timing->v_addressable * main_timing->h_total / - (double)(main_timing->pix_clk_100hz * 100) * 1000000; - max_vblank_mallregion = vblank_blank_us > mall_region_us ? vblank_blank_us : mall_region_us; - - // Schedulable if VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, - // and the max of (VBLANK blanking time, MALL region) - // TODO: Possibly add some margin (i.e. the below conditions should be [...] > X instead of [...] > 0) - if (subvp_active_us - prefetch_us - vblank_frame_us - max_vblank_mallregion > 0) - schedulable = true; - } - return schedulable; -} - -/** - * ******************************************************************************************** - * subvp_validate_static_schedulability: Check which SubVP case is calculated and handle - * static analysis based on the case. - * - * Three cases: - * 1. SubVP + SubVP - * 2. SubVP + VBLANK (DRR checked internally) - * 3. SubVP + VACTIVE (currently unsupported) - * - * @param [in] dc: current dc state - * @param [in] context: new dc state - * @param [in] vlevel: Voltage level calculated by DML - * - * @return: bool - True if statically schedulable, false otherwise - * - * ******************************************************************************************** - */ -static bool subvp_validate_static_schedulability(struct dc *dc, - struct dc_state *context, - int vlevel) -{ - bool schedulable = true; // true by default for single display case - struct vba_vars_st *vba = &context->bw_ctx.dml.vba; - uint32_t i, pipe_idx; - uint8_t subvp_count = 0; - uint8_t vactive_count = 0; - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (!pipe->stream) - continue; - - if (pipe->plane_state && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_MAIN) - subvp_count++; - - // Count how many planes are capable of VACTIVE switching (SubVP + VACTIVE unsupported) - if (vba->ActiveDRAMClockChangeLatencyMargin[vba->pipe_plane[pipe_idx]] > 0) { - vactive_count++; - } - pipe_idx++; - } - - if (subvp_count == 2) { - // Static schedulability check for SubVP + SubVP case - schedulable = subvp_subvp_schedulable(dc, context); - } else if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_vblank_w_mall_sub_vp) { - // Static schedulability check for SubVP + VBLANK case. Also handle the case where - // DML outputs SubVP + VBLANK + VACTIVE (DML will report as SubVP + VBLANK) - if (vactive_count > 0) - schedulable = false; - else - schedulable = subvp_vblank_schedulable(dc, context); - } else if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_vactive_w_mall_sub_vp) { - // SubVP + VACTIVE currently unsupported - schedulable = false; - } - return schedulable; -} - -static void dcn32_full_validate_bw_helper(struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int *vlevel, - int *split, - bool *merge, - int *pipe_cnt) -{ - struct vba_vars_st *vba = &context->bw_ctx.dml.vba; - unsigned int dc_pipe_idx = 0; - bool found_supported_config = false; - struct pipe_ctx *pipe = NULL; - uint32_t non_subvp_pipes = 0; - bool drr_pipe_found = false; - uint32_t drr_pipe_index = 0; - uint32_t i = 0; - - /* - * DML favors voltage over p-state, but we're more interested in - * supporting p-state over voltage. We can't support p-state in - * prefetch mode > 0 so try capping the prefetch mode to start. - */ - context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = - dm_prefetch_support_uclk_fclk_and_stutter; - *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); - /* This may adjust vlevel and maxMpcComb */ - if (*vlevel < context->bw_ctx.dml.soc.num_states) - *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge); - - /* Conditions for setting up phantom pipes for SubVP: - * 1. Not force disable SubVP - * 2. Full update (i.e. !fast_validate) - * 3. Enough pipes are available to support SubVP (TODO: Which pipes will use VACTIVE / VBLANK / SUBVP?) - * 4. Display configuration passes validation - * 5. (Config doesn't support MCLK in VACTIVE/VBLANK || dc->debug.force_subvp_mclk_switch) - */ - if (!dc->debug.force_disable_subvp && dcn32_all_pipes_have_stream_and_plane(dc, context) && - (*vlevel == context->bw_ctx.dml.soc.num_states || - vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported || - dc->debug.force_subvp_mclk_switch)) { - - dcn32_merge_pipes_for_subvp(dc, context); - - while (!found_supported_config && dcn32_enough_pipes_for_subvp(dc, context) && - dcn32_assign_subvp_pipe(dc, context, &dc_pipe_idx)) { - - /* For the case where *vlevel = num_states, bandwidth validation has failed for this config. - * Adding phantom pipes won't change the validation result, so change the DML input param - * for P-State support before adding phantom pipes and recalculating the DML result. - * However, this case is only applicable for SubVP + DRR cases because the prefetch mode - * will not allow for switch in VBLANK. The DRR display must have it's VBLANK stretched - * enough to support support MCLK switching. - */ - if (*vlevel == context->bw_ctx.dml.soc.num_states) { - context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = - dm_prefetch_support_stutter; - /* There are params (such as FabricClock) that need to be recalculated - * after validation fails (otherwise it will be 0). Calculation for - * phantom vactive requires call into DML, so we must ensure all the - * vba params are valid otherwise we'll get incorrect phantom vactive. - */ - *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); - } - - dc->res_pool->funcs->add_phantom_pipes(dc, context, pipes, *pipe_cnt, dc_pipe_idx); - - *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false); - *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); - - if (*vlevel < context->bw_ctx.dml.soc.num_states && - vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] != dm_dram_clock_change_unsupported - && subvp_validate_static_schedulability(dc, context, *vlevel)) { - found_supported_config = true; - } else if (*vlevel < context->bw_ctx.dml.soc.num_states && - vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) { - /* Case where 1 SubVP is added, and DML reports MCLK unsupported. This handles - * the case for SubVP + DRR, where the DRR display does not support MCLK switch - * at it's native refresh rate / timing. - */ - for (i = 0; i < dc->res_pool->pipe_count; i++) { - pipe = &context->res_ctx.pipe_ctx[i]; - if (pipe->stream && pipe->plane_state && !pipe->top_pipe && - pipe->stream->mall_stream_config.type == SUBVP_NONE) { - non_subvp_pipes++; - // Use ignore_msa_timing_param flag to identify as DRR - if (pipe->stream->ignore_msa_timing_param) { - drr_pipe_found = true; - drr_pipe_index = i; - } - } - } - // If there is only 1 remaining non SubVP pipe that is DRR, check static - // schedulability for SubVP + DRR. - if (non_subvp_pipes == 1 && drr_pipe_found) { - found_supported_config = subvp_drr_schedulable(dc, - context, &context->res_ctx.pipe_ctx[drr_pipe_index]); - } - } - } - - // If SubVP pipe config is unsupported (or cannot be used for UCLK switching) - // remove phantom pipes and repopulate dml pipes - if (!found_supported_config) { - dc->res_pool->funcs->remove_phantom_pipes(dc, context); - vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] = dm_dram_clock_change_unsupported; - *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false); - } else { - // only call dcn20_validate_apply_pipe_split_flags if we found a supported config - memset(split, 0, MAX_PIPES * sizeof(int)); - memset(merge, 0, MAX_PIPES * sizeof(bool)); - *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge); - - // Most populate phantom DLG params before programming hardware / timing for phantom pipe - DC_FP_START(); - dcn32_helper_populate_phantom_dlg_params(dc, context, pipes, *pipe_cnt); - DC_FP_END(); - - // Note: We can't apply the phantom pipes to hardware at this time. We have to wait - // until driver has acquired the DMCUB lock to do it safely. - } - } -} - static bool dcn32_internal_validate_bw( struct dc *dc, struct dc_state *context, @@ -2498,7 +1903,9 @@ static bool dcn32_internal_validate_bw( dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt); if (!fast_validate) { + DC_FP_START(); dcn32_full_validate_bw_helper(dc, context, pipes, &vlevel, split, merge, &pipe_cnt); + DC_FP_END(); } if (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states || diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index 7ccad84b1f161..37d37067e9831 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -103,6 +103,8 @@ bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc, bool dcn32_subvp_in_use(struct dc *dc, struct dc_state *context); +bool dcn32_mpo_in_use(struct dc_state *context); + void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_params_st *pipes, bool *is_pipe_split_expected, int pipe_cnt); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index 796e3d966a766..47caa2c6d5b4e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -153,6 +153,17 @@ bool dcn32_subvp_in_use(struct dc *dc, return false; } +bool dcn32_mpo_in_use(struct dc_state *context) +{ + uint32_t i; + + for (i = 0; i < context->stream_count; i++) { + if (context->stream_status[i].plane_count > 1) + return true; + } + return false; +} + void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_params_st *pipes, bool *is_pipe_split_expected, int pipe_cnt) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 74ccf453349ca..82d801933aecd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -24,6 +24,8 @@ * */ #include "dcn32_fpu.h" +#include "dcn32/dcn32_resource.h" +#include "dcn20/dcn20_resource.h" #include "display_mode_vba_util_32.h" // We need this includes for WATERMARKS_* defines #include "clk_mgr/dcn32/dcn32_smu13_driver_if.h" @@ -461,3 +463,598 @@ void dcn32_set_phantom_stream_timing(struct dc *dc, phantom_bp; } +/** + * dcn32_get_num_free_pipes: Calculate number of free pipes + * + * This function assumes that a "used" pipe is a pipe that has + * both a stream and a plane assigned to it. + * + * @dc: current dc state + * @context: new dc state + * + * Return: + * Number of free pipes available in the context + */ +static unsigned int dcn32_get_num_free_pipes(struct dc *dc, struct dc_state *context) +{ + unsigned int i; + unsigned int free_pipes = 0; + unsigned int num_pipes = 0; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->stream && !pipe->top_pipe) { + while (pipe) { + num_pipes++; + pipe = pipe->bottom_pipe; + } + } + } + + free_pipes = dc->res_pool->pipe_count - num_pipes; + return free_pipes; +} + +/** + * dcn32_assign_subvp_pipe: Function to decide which pipe will use Sub-VP. + * + * We enter this function if we are Sub-VP capable (i.e. enough pipes available) + * and regular P-State switching (i.e. VACTIVE/VBLANK) is not supported, or if + * we are forcing SubVP P-State switching on the current config. + * + * The number of pipes used for the chosen surface must be less than or equal to the + * number of free pipes available. + * + * In general we choose surfaces with the longest frame time first (better for SubVP + VBLANK). + * For multi-display cases the ActiveDRAMClockChangeMargin doesn't provide enough info on its own + * for determining which should be the SubVP pipe (need a way to determine if a pipe / plane doesn't + * support MCLK switching naturally [i.e. ACTIVE or VBLANK]). + * + * @param dc: current dc state + * @param context: new dc state + * @param index: [out] dc pipe index for the pipe chosen to have phantom pipes assigned + * + * Return: + * True if a valid pipe assignment was found for Sub-VP. Otherwise false. + */ +static bool dcn32_assign_subvp_pipe(struct dc *dc, + struct dc_state *context, + unsigned int *index) +{ + unsigned int i, pipe_idx; + unsigned int max_frame_time = 0; + bool valid_assignment_found = false; + unsigned int free_pipes = dcn32_get_num_free_pipes(dc, context); + bool current_assignment_freesync = false; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + unsigned int num_pipes = 0; + unsigned int refresh_rate = 0; + + if (!pipe->stream) + continue; + + // Round up + refresh_rate = (pipe->stream->timing.pix_clk_100hz * 100 + + pipe->stream->timing.v_total * pipe->stream->timing.h_total - 1) + / (double)(pipe->stream->timing.v_total * pipe->stream->timing.h_total); + if (pipe->plane_state && !pipe->top_pipe && + pipe->stream->mall_stream_config.type == SUBVP_NONE && refresh_rate < 120) { + while (pipe) { + num_pipes++; + pipe = pipe->bottom_pipe; + } + + pipe = &context->res_ctx.pipe_ctx[i]; + if (num_pipes <= free_pipes) { + struct dc_stream_state *stream = pipe->stream; + unsigned int frame_us = (stream->timing.v_total * stream->timing.h_total / + (double)(stream->timing.pix_clk_100hz * 100)) * 1000000; + if (frame_us > max_frame_time && !stream->ignore_msa_timing_param) { + *index = i; + max_frame_time = frame_us; + valid_assignment_found = true; + current_assignment_freesync = false; + /* For the 2-Freesync display case, still choose the one with the + * longest frame time + */ + } else if (stream->ignore_msa_timing_param && (!valid_assignment_found || + (current_assignment_freesync && frame_us > max_frame_time))) { + *index = i; + valid_assignment_found = true; + current_assignment_freesync = true; + } + } + } + pipe_idx++; + } + return valid_assignment_found; +} + +/** + * dcn32_enough_pipes_for_subvp: Function to check if there are "enough" pipes for SubVP. + * + * This function returns true if there are enough free pipes + * to create the required phantom pipes for any given stream + * (that does not already have phantom pipe assigned). + * + * e.g. For a 2 stream config where the first stream uses one + * pipe and the second stream uses 2 pipes (i.e. pipe split), + * this function will return true because there is 1 remaining + * pipe which can be used as the phantom pipe for the non pipe + * split pipe. + * + * @dc: current dc state + * @context: new dc state + * + * Return: + * True if there are enough free pipes to assign phantom pipes to at least one + * stream that does not already have phantom pipes assigned. Otherwise false. + */ +static bool dcn32_enough_pipes_for_subvp(struct dc *dc, struct dc_state *context) +{ + unsigned int i, split_cnt, free_pipes; + unsigned int min_pipe_split = dc->res_pool->pipe_count + 1; // init as max number of pipes + 1 + bool subvp_possible = false; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + // Find the minimum pipe split count for non SubVP pipes + if (pipe->stream && !pipe->top_pipe && + pipe->stream->mall_stream_config.type == SUBVP_NONE) { + split_cnt = 0; + while (pipe) { + split_cnt++; + pipe = pipe->bottom_pipe; + } + + if (split_cnt < min_pipe_split) + min_pipe_split = split_cnt; + } + } + + free_pipes = dcn32_get_num_free_pipes(dc, context); + + // SubVP only possible if at least one pipe is being used (i.e. free_pipes + // should not equal to the pipe_count) + if (free_pipes >= min_pipe_split && free_pipes < dc->res_pool->pipe_count) + subvp_possible = true; + + return subvp_possible; +} + +/** + * subvp_subvp_schedulable: Determine if SubVP + SubVP config is schedulable + * + * High level algorithm: + * 1. Find longest microschedule length (in us) between the two SubVP pipes + * 2. Check if the worst case overlap (VBLANK in middle of ACTIVE) for both + * pipes still allows for the maximum microschedule to fit in the active + * region for both pipes. + * + * @dc: current dc state + * @context: new dc state + * + * Return: + * bool - True if the SubVP + SubVP config is schedulable, false otherwise + */ +static bool subvp_subvp_schedulable(struct dc *dc, struct dc_state *context) +{ + struct pipe_ctx *subvp_pipes[2]; + struct dc_stream_state *phantom = NULL; + uint32_t microschedule_lines = 0; + uint32_t index = 0; + uint32_t i; + uint32_t max_microschedule_us = 0; + int32_t vactive1_us, vactive2_us, vblank1_us, vblank2_us; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + uint32_t time_us = 0; + + /* Loop to calculate the maximum microschedule time between the two SubVP pipes, + * and also to store the two main SubVP pipe pointers in subvp_pipes[2]. + */ + if (pipe->stream && pipe->plane_state && !pipe->top_pipe && + pipe->stream->mall_stream_config.type == SUBVP_MAIN) { + phantom = pipe->stream->mall_stream_config.paired_stream; + microschedule_lines = (phantom->timing.v_total - phantom->timing.v_front_porch) + + phantom->timing.v_addressable; + + // Round up when calculating microschedule time (+ 1 at the end) + time_us = (microschedule_lines * phantom->timing.h_total) / + (double)(phantom->timing.pix_clk_100hz * 100) * 1000000 + + dc->caps.subvp_prefetch_end_to_mall_start_us + + dc->caps.subvp_fw_processing_delay_us + 1; + if (time_us > max_microschedule_us) + max_microschedule_us = time_us; + + subvp_pipes[index] = pipe; + index++; + + // Maximum 2 SubVP pipes + if (index == 2) + break; + } + } + vactive1_us = ((subvp_pipes[0]->stream->timing.v_addressable * subvp_pipes[0]->stream->timing.h_total) / + (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000; + vactive2_us = ((subvp_pipes[1]->stream->timing.v_addressable * subvp_pipes[1]->stream->timing.h_total) / + (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000; + vblank1_us = (((subvp_pipes[0]->stream->timing.v_total - subvp_pipes[0]->stream->timing.v_addressable) * + subvp_pipes[0]->stream->timing.h_total) / + (double)(subvp_pipes[0]->stream->timing.pix_clk_100hz * 100)) * 1000000; + vblank2_us = (((subvp_pipes[1]->stream->timing.v_total - subvp_pipes[1]->stream->timing.v_addressable) * + subvp_pipes[1]->stream->timing.h_total) / + (double)(subvp_pipes[1]->stream->timing.pix_clk_100hz * 100)) * 1000000; + + if ((vactive1_us - vblank2_us) / 2 > max_microschedule_us && + (vactive2_us - vblank1_us) / 2 > max_microschedule_us) + return true; + + return false; +} + +/** + * subvp_drr_schedulable: Determine if SubVP + DRR config is schedulable + * + * High level algorithm: + * 1. Get timing for SubVP pipe, phantom pipe, and DRR pipe + * 2. Determine the frame time for the DRR display when adding required margin for MCLK switching + * (the margin is equal to the MALL region + DRR margin (500us)) + * 3.If (SubVP Active - Prefetch > Stretched DRR frame + max(MALL region, Stretched DRR frame)) + * then report the configuration as supported + * + * @dc: current dc state + * @context: new dc state + * @drr_pipe: DRR pipe_ctx for the SubVP + DRR config + * + * Return: + * bool - True if the SubVP + DRR config is schedulable, false otherwise + */ +static bool subvp_drr_schedulable(struct dc *dc, struct dc_state *context, struct pipe_ctx *drr_pipe) +{ + bool schedulable = false; + uint32_t i; + struct pipe_ctx *pipe = NULL; + struct dc_crtc_timing *main_timing = NULL; + struct dc_crtc_timing *phantom_timing = NULL; + struct dc_crtc_timing *drr_timing = NULL; + int16_t prefetch_us = 0; + int16_t mall_region_us = 0; + int16_t drr_frame_us = 0; // nominal frame time + int16_t subvp_active_us = 0; + int16_t stretched_drr_us = 0; + int16_t drr_stretched_vblank_us = 0; + int16_t max_vblank_mallregion = 0; + + // Find SubVP pipe + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + + // We check for master pipe, but it shouldn't matter since we only need + // the pipe for timing info (stream should be same for any pipe splits) + if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe) + continue; + + // Find the SubVP pipe + if (pipe->stream->mall_stream_config.type == SUBVP_MAIN) + break; + } + + main_timing = &pipe->stream->timing; + phantom_timing = &pipe->stream->mall_stream_config.paired_stream->timing; + drr_timing = &drr_pipe->stream->timing; + prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + + dc->caps.subvp_prefetch_end_to_mall_start_us; + subvp_active_us = main_timing->v_addressable * main_timing->h_total / + (double)(main_timing->pix_clk_100hz * 100) * 1000000; + drr_frame_us = drr_timing->v_total * drr_timing->h_total / + (double)(drr_timing->pix_clk_100hz * 100) * 1000000; + // P-State allow width and FW delays already included phantom_timing->v_addressable + mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000; + stretched_drr_us = drr_frame_us + mall_region_us + SUBVP_DRR_MARGIN_US; + drr_stretched_vblank_us = (drr_timing->v_total - drr_timing->v_addressable) * drr_timing->h_total / + (double)(drr_timing->pix_clk_100hz * 100) * 1000000 + (stretched_drr_us - drr_frame_us); + max_vblank_mallregion = drr_stretched_vblank_us > mall_region_us ? drr_stretched_vblank_us : mall_region_us; + + /* We consider SubVP + DRR schedulable if the stretched frame duration of the DRR display (i.e. the + * highest refresh rate + margin that can support UCLK P-State switch) passes the static analysis + * for VBLANK: (VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, + * and the max of (VBLANK blanking time, MALL region)). + */ + if (stretched_drr_us < (1 / (double)drr_timing->min_refresh_in_uhz) * 1000000 * 1000000 && + subvp_active_us - prefetch_us - stretched_drr_us - max_vblank_mallregion > 0) + schedulable = true; + + return schedulable; +} + + +/** + * subvp_vblank_schedulable: Determine if SubVP + VBLANK config is schedulable + * + * High level algorithm: + * 1. Get timing for SubVP pipe, phantom pipe, and VBLANK pipe + * 2. If (SubVP Active - Prefetch > Vblank Frame Time + max(MALL region, Vblank blanking time)) + * then report the configuration as supported + * 3. If the VBLANK display is DRR, then take the DRR static schedulability path + * + * @dc: current dc state + * @context: new dc state + * + * Return: + * bool - True if the SubVP + VBLANK/DRR config is schedulable, false otherwise + */ +static bool subvp_vblank_schedulable(struct dc *dc, struct dc_state *context) +{ + struct pipe_ctx *pipe = NULL; + struct pipe_ctx *subvp_pipe = NULL; + bool found = false; + bool schedulable = false; + uint32_t i = 0; + uint8_t vblank_index = 0; + uint16_t prefetch_us = 0; + uint16_t mall_region_us = 0; + uint16_t vblank_frame_us = 0; + uint16_t subvp_active_us = 0; + uint16_t vblank_blank_us = 0; + uint16_t max_vblank_mallregion = 0; + struct dc_crtc_timing *main_timing = NULL; + struct dc_crtc_timing *phantom_timing = NULL; + struct dc_crtc_timing *vblank_timing = NULL; + + /* For SubVP + VBLANK/DRR cases, we assume there can only be + * a single VBLANK/DRR display. If DML outputs SubVP + VBLANK + * is supported, it is either a single VBLANK case or two VBLANK + * displays which are synchronized (in which case they have identical + * timings). + */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + + // We check for master pipe, but it shouldn't matter since we only need + // the pipe for timing info (stream should be same for any pipe splits) + if (!pipe->stream || !pipe->plane_state || pipe->top_pipe || pipe->prev_odm_pipe) + continue; + + if (!found && pipe->stream->mall_stream_config.type == SUBVP_NONE) { + // Found pipe which is not SubVP or Phantom (i.e. the VBLANK pipe). + vblank_index = i; + found = true; + } + + if (!subvp_pipe && pipe->stream->mall_stream_config.type == SUBVP_MAIN) + subvp_pipe = pipe; + } + // Use ignore_msa_timing_param flag to identify as DRR + if (found && context->res_ctx.pipe_ctx[vblank_index].stream->ignore_msa_timing_param) { + // SUBVP + DRR case + schedulable = subvp_drr_schedulable(dc, context, &context->res_ctx.pipe_ctx[vblank_index]); + } else if (found) { + main_timing = &subvp_pipe->stream->timing; + phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing; + vblank_timing = &context->res_ctx.pipe_ctx[vblank_index].stream->timing; + // Prefetch time is equal to VACTIVE + BP + VSYNC of the phantom pipe + // Also include the prefetch end to mallstart delay time + prefetch_us = (phantom_timing->v_total - phantom_timing->v_front_porch) * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000 + + dc->caps.subvp_prefetch_end_to_mall_start_us; + // P-State allow width and FW delays already included phantom_timing->v_addressable + mall_region_us = phantom_timing->v_addressable * phantom_timing->h_total / + (double)(phantom_timing->pix_clk_100hz * 100) * 1000000; + vblank_frame_us = vblank_timing->v_total * vblank_timing->h_total / + (double)(vblank_timing->pix_clk_100hz * 100) * 1000000; + vblank_blank_us = (vblank_timing->v_total - vblank_timing->v_addressable) * vblank_timing->h_total / + (double)(vblank_timing->pix_clk_100hz * 100) * 1000000; + subvp_active_us = main_timing->v_addressable * main_timing->h_total / + (double)(main_timing->pix_clk_100hz * 100) * 1000000; + max_vblank_mallregion = vblank_blank_us > mall_region_us ? vblank_blank_us : mall_region_us; + + // Schedulable if VACTIVE region of the SubVP pipe can fit the MALL prefetch, VBLANK frame time, + // and the max of (VBLANK blanking time, MALL region) + // TODO: Possibly add some margin (i.e. the below conditions should be [...] > X instead of [...] > 0) + if (subvp_active_us - prefetch_us - vblank_frame_us - max_vblank_mallregion > 0) + schedulable = true; + } + return schedulable; +} + +/** + * subvp_validate_static_schedulability: Check which SubVP case is calculated and handle + * static analysis based on the case. + * + * Three cases: + * 1. SubVP + SubVP + * 2. SubVP + VBLANK (DRR checked internally) + * 3. SubVP + VACTIVE (currently unsupported) + * + * @dc: current dc state + * @context: new dc state + * @vlevel: Voltage level calculated by DML + * + * Return: + * bool - True if statically schedulable, false otherwise + */ +static bool subvp_validate_static_schedulability(struct dc *dc, + struct dc_state *context, + int vlevel) +{ + bool schedulable = true; // true by default for single display case + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + uint32_t i, pipe_idx; + uint8_t subvp_count = 0; + uint8_t vactive_count = 0; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->stream) + continue; + + if (pipe->plane_state && !pipe->top_pipe && + pipe->stream->mall_stream_config.type == SUBVP_MAIN) + subvp_count++; + + // Count how many planes that aren't SubVP/phantom are capable of VACTIVE + // switching (SubVP + VACTIVE unsupported). In situations where we force + // SubVP for a VACTIVE plane, we don't want to increment the vactive_count. + if (vba->ActiveDRAMClockChangeLatencyMargin[vba->pipe_plane[pipe_idx]] > 0 && + pipe->stream->mall_stream_config.type == SUBVP_NONE) { + vactive_count++; + } + pipe_idx++; + } + + if (subvp_count == 2) { + // Static schedulability check for SubVP + SubVP case + schedulable = subvp_subvp_schedulable(dc, context); + } else if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_vblank_w_mall_sub_vp) { + // Static schedulability check for SubVP + VBLANK case. Also handle the case where + // DML outputs SubVP + VBLANK + VACTIVE (DML will report as SubVP + VBLANK) + if (vactive_count > 0) + schedulable = false; + else + schedulable = subvp_vblank_schedulable(dc, context); + } else if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_vactive_w_mall_sub_vp && + vactive_count > 0) { + // For single display SubVP cases, DML will output dm_dram_clock_change_vactive_w_mall_sub_vp by default. + // We tell the difference between SubVP vs. SubVP + VACTIVE by checking the vactive_count. + // SubVP + VACTIVE currently unsupported + schedulable = false; + } + return schedulable; +} + +void dcn32_full_validate_bw_helper(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *vlevel, + int *split, + bool *merge, + int *pipe_cnt) +{ + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + unsigned int dc_pipe_idx = 0; + bool found_supported_config = false; + struct pipe_ctx *pipe = NULL; + uint32_t non_subvp_pipes = 0; + bool drr_pipe_found = false; + uint32_t drr_pipe_index = 0; + uint32_t i = 0; + + dc_assert_fp_enabled(); + + /* + * DML favors voltage over p-state, but we're more interested in + * supporting p-state over voltage. We can't support p-state in + * prefetch mode > 0 so try capping the prefetch mode to start. + */ + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = + dm_prefetch_support_uclk_fclk_and_stutter; + *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); + /* This may adjust vlevel and maxMpcComb */ + if (*vlevel < context->bw_ctx.dml.soc.num_states) + *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge); + + /* Conditions for setting up phantom pipes for SubVP: + * 1. Not force disable SubVP + * 2. Full update (i.e. !fast_validate) + * 3. Enough pipes are available to support SubVP (TODO: Which pipes will use VACTIVE / VBLANK / SUBVP?) + * 4. Display configuration passes validation + * 5. (Config doesn't support MCLK in VACTIVE/VBLANK || dc->debug.force_subvp_mclk_switch) + */ + if (!dc->debug.force_disable_subvp && dcn32_all_pipes_have_stream_and_plane(dc, context) && + !dcn32_mpo_in_use(context) && (*vlevel == context->bw_ctx.dml.soc.num_states || + vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported || + dc->debug.force_subvp_mclk_switch)) { + + dcn32_merge_pipes_for_subvp(dc, context); + + while (!found_supported_config && dcn32_enough_pipes_for_subvp(dc, context) && + dcn32_assign_subvp_pipe(dc, context, &dc_pipe_idx)) { + /* For the case where *vlevel = num_states, bandwidth validation has failed for this config. + * Adding phantom pipes won't change the validation result, so change the DML input param + * for P-State support before adding phantom pipes and recalculating the DML result. + * However, this case is only applicable for SubVP + DRR cases because the prefetch mode + * will not allow for switch in VBLANK. The DRR display must have it's VBLANK stretched + * enough to support MCLK switching. + */ + if (*vlevel == context->bw_ctx.dml.soc.num_states) { + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = + dm_prefetch_support_stutter; + /* There are params (such as FabricClock) that need to be recalculated + * after validation fails (otherwise it will be 0). Calculation for + * phantom vactive requires call into DML, so we must ensure all the + * vba params are valid otherwise we'll get incorrect phantom vactive. + */ + *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); + } + + dc->res_pool->funcs->add_phantom_pipes(dc, context, pipes, *pipe_cnt, dc_pipe_idx); + + *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false); + // Populate dppclk to trigger a recalculate in dml_get_voltage_level + // so the phantom pipe DLG params can be assigned correctly. + pipes[0].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, *pipe_cnt, 0); + *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); + + if (*vlevel < context->bw_ctx.dml.soc.num_states && + vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] != dm_dram_clock_change_unsupported + && subvp_validate_static_schedulability(dc, context, *vlevel)) { + found_supported_config = true; + } else if (*vlevel < context->bw_ctx.dml.soc.num_states && + vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) { + /* Case where 1 SubVP is added, and DML reports MCLK unsupported. This handles + * the case for SubVP + DRR, where the DRR display does not support MCLK switch + * at it's native refresh rate / timing. + */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe = &context->res_ctx.pipe_ctx[i]; + if (pipe->stream && pipe->plane_state && !pipe->top_pipe && + pipe->stream->mall_stream_config.type == SUBVP_NONE) { + non_subvp_pipes++; + // Use ignore_msa_timing_param flag to identify as DRR + if (pipe->stream->ignore_msa_timing_param) { + drr_pipe_found = true; + drr_pipe_index = i; + } + } + } + // If there is only 1 remaining non SubVP pipe that is DRR, check static + // schedulability for SubVP + DRR. + if (non_subvp_pipes == 1 && drr_pipe_found) { + found_supported_config = subvp_drr_schedulable(dc, context, + &context->res_ctx.pipe_ctx[drr_pipe_index]); + } + } + } + + // If SubVP pipe config is unsupported (or cannot be used for UCLK switching) + // remove phantom pipes and repopulate dml pipes + if (!found_supported_config) { + dc->res_pool->funcs->remove_phantom_pipes(dc, context); + vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] = dm_dram_clock_change_unsupported; + *pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false); + } else { + // only call dcn20_validate_apply_pipe_split_flags if we found a supported config + memset(split, 0, MAX_PIPES * sizeof(int)); + memset(merge, 0, MAX_PIPES * sizeof(bool)); + *vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge); + + // Most populate phantom DLG params before programming hardware / timing for phantom pipe + DC_FP_START(); + dcn32_helper_populate_phantom_dlg_params(dc, context, pipes, *pipe_cnt); + DC_FP_END(); + + // Note: We can't apply the phantom pipes to hardware at this time. We have to wait + // until driver has acquired the DMCUB lock to do it safely. + } + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index 4abef908dca90..29fb6b1bc17fc 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -57,4 +57,12 @@ void dcn32_set_phantom_stream_timing(struct dc *dc, unsigned int pipe_cnt, unsigned int dc_pipe_idx); +void dcn32_full_validate_bw_helper(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *vlevel, + int *split, + bool *merge, + int *pipe_cnt); + #endif From 0339530d8879cbd560cd3d3de5138dc797744274 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 8 Jul 2022 11:24:05 -0400 Subject: [PATCH 078/134] drm/amd/display: Move wm and dlg calculation to FPU code Move dcn32_calculate_wm_and_dlg from dcn32 resources to the FPU code. Additionally, this commit adds an interface to it. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn32/dcn32_resource.c | 196 +----------------- .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 185 +++++++++++++++++ .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 5 + 3 files changed, 195 insertions(+), 191 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index fb48293fa04ec..3385b5c70c8c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -2280,187 +2280,6 @@ int dcn32_populate_dml_pipes_from_context( return pipe_cnt; } -void dcn32_calculate_wm_and_dlg_fp( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel) -{ - int i, pipe_idx, vlevel_temp = 0; - double dcfclk = dcn3_2_soc.clock_limits[0].dcfclk_mhz; - double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; - unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; - bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != - dm_dram_clock_change_unsupported; - - // Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK - if (!pstate_en && dcn32_subvp_in_use(dc, context)) { - context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp; - pstate_en = true; - } - - /* Set B: - * For Set B calculations use clocks from clock_limits[2] when available i.e. when SMU is present, - * otherwise use arbitrary low value from spreadsheet for DCFCLK as lower is safer for watermark - * calculations to cover bootup clocks. - * DCFCLK: soc.clock_limits[2] when available - * UCLK: soc.clock_limits[2] when available - */ - if (dcn3_2_soc.num_states > 2) { - vlevel_temp = 2; - dcfclk = dcn3_2_soc.clock_limits[2].dcfclk_mhz; - } else - dcfclk = 615; //DCFCLK Vmin_lv - - pipes[0].clks_cfg.voltage = vlevel_temp; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk; - pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz; - - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.fclk_change_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - - /* Set D: - * All clocks min. - * DCFCLK: Min, as reported by PM FW when available - * UCLK : Min, as reported by PM FW when available - * sr_enter_exit/sr_exit should be lower than used for DRAM (TBD after bringup or later, use as decided in Clk Mgr) - */ - - if (dcn3_2_soc.num_states > 2) { - vlevel_temp = 0; - dcfclk = dc->clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz; - } else - dcfclk = 615; //DCFCLK Vmin_lv - - pipes[0].clks_cfg.voltage = vlevel_temp; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk; - pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz; - - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.fclk_change_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - - /* Set C, for Dummy P-State: - * All clocks min. - * DCFCLK: Min, as reported by PM FW, when available - * UCLK : Min, as reported by PM FW, when available - * pstate latency as per UCLK state dummy pstate latency - */ - // For Set A and Set C use values from validation - pipes[0].clks_cfg.voltage = vlevel; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk_from_validation; - pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; - - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { - unsigned int min_dram_speed_mts_margin = 160; - - if ((!pstate_en)) - min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16; - - /* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */ - for (i = 3; i > 0; i--) - if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts) - break; - - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us; - context->bw_ctx.dml.soc.dummy_pstate_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us; - context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - - if ((!pstate_en) && (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid)) { - /* The only difference between A and C is p-state latency, if p-state is not supported - * with full p-state latency we want to calculate DLG based on dummy p-state latency, - * Set A p-state watermark set to 0 on DCN32, when p-state unsupported, for now keep as DCN32. - */ - context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0; - } else { - /* Set A: - * All clocks min. - * DCFCLK: Min, as reported by PM FW, when available - * UCLK: Min, as reported by PM FW, when available - */ - dc->res_pool->funcs->update_soc_for_wm_a(dc, context); - context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - } - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - - pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); - pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - - if (dc->config.forced_clocks) { - pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; - pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; - } - if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) - pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; - if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) - pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; - - pipe_idx++; - } - - context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod; - - dcn32_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); - - if (!pstate_en) - /* Restore full p-state latency */ - context->bw_ctx.dml.soc.dram_clock_change_latency_us = - dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; -} - static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap }; @@ -2488,18 +2307,13 @@ static void dcn32_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, (dcn3_2_soc.return_bus_width_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100)); } -void dcn32_calculate_wm_and_dlg( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel) +void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) { DC_FP_START(); - dcn32_calculate_wm_and_dlg_fp( - dc, context, - pipes, - pipe_cnt, - vlevel); + dcn32_calculate_wm_and_dlg_fpu(dc, context, pipes, pipe_cnt, vlevel); DC_FP_END(); } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 82d801933aecd..353d3a74e40ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -1058,3 +1058,188 @@ void dcn32_full_validate_bw_helper(struct dc *dc, } } +void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) +{ + int i, pipe_idx, vlevel_temp = 0; + double dcfclk = dcn3_2_soc.clock_limits[0].dcfclk_mhz; + double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != + dm_dram_clock_change_unsupported; + + dc_assert_fp_enabled(); + + // Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK + if (!pstate_en && dcn32_subvp_in_use(dc, context)) { + context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp; + pstate_en = true; + } + + /* Set B: + * For Set B calculations use clocks from clock_limits[2] when available i.e. when SMU is present, + * otherwise use arbitrary low value from spreadsheet for DCFCLK as lower is safer for watermark + * calculations to cover bootup clocks. + * DCFCLK: soc.clock_limits[2] when available + * UCLK: soc.clock_limits[2] when available + */ + if (dcn3_2_soc.num_states > 2) { + vlevel_temp = 2; + dcfclk = dcn3_2_soc.clock_limits[2].dcfclk_mhz; + } else + dcfclk = 615; //DCFCLK Vmin_lv + + pipes[0].clks_cfg.voltage = vlevel_temp; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz; + + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.fclk_change_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us; + } + context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + /* Set D: + * All clocks min. + * DCFCLK: Min, as reported by PM FW when available + * UCLK : Min, as reported by PM FW when available + * sr_enter_exit/sr_exit should be lower than used for DRAM (TBD after bringup or later, use as decided in Clk Mgr) + */ + + if (dcn3_2_soc.num_states > 2) { + vlevel_temp = 0; + dcfclk = dc->clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz; + } else + dcfclk = 615; //DCFCLK Vmin_lv + + pipes[0].clks_cfg.voltage = vlevel_temp; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz; + + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.fclk_change_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us; + } + context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + /* Set C, for Dummy P-State: + * All clocks min. + * DCFCLK: Min, as reported by PM FW, when available + * UCLK : Min, as reported by PM FW, when available + * pstate latency as per UCLK state dummy pstate latency + */ + + // For Set A and Set C use values from validation + pipes[0].clks_cfg.voltage = vlevel; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk_from_validation; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; + + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { + unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; + unsigned int min_dram_speed_mts_margin = 160; + + if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == + dm_dram_clock_change_unsupported) + min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16; + + /* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */ + for (i = 3; i > 0; i--) + if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts) + break; + + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us; + context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; + } + + context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + if ((!pstate_en) && (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid)) { + /* The only difference between A and C is p-state latency, if p-state is not supported + * with full p-state latency we want to calculate DLG based on dummy p-state latency, + * Set A p-state watermark set to 0 on DCN30, when p-state unsupported, for now keep as DCN30. + */ + context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0; + } else { + /* Set A: + * All clocks min. + * DCFCLK: Min, as reported by PM FW, when available + * UCLK: Min, as reported by PM FW, when available + */ + dc->res_pool->funcs->update_soc_for_wm_a(dc, context); + context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.usr_retraining_ns = get_usr_retraining_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + } + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); + pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + + if (dc->config.forced_clocks) { + pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; + pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; + } + if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) + pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; + if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) + pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; + + pipe_idx++; + } + + context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod; + + dcn32_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); + + if (!pstate_en) + /* Restore full p-state latency */ + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; + +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index 29fb6b1bc17fc..c7602f084be20 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -65,4 +65,9 @@ void dcn32_full_validate_bw_helper(struct dc *dc, bool *merge, int *pipe_cnt); +void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel); + #endif From 5b4ee98713c7b42e270835fea08c3b6977b48e7e Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 8 Jul 2022 11:50:22 -0400 Subject: [PATCH 079/134] drm/amd/display: Move dlg params calculation Move dlg params calculation to the FPU folder and make it static. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn32/dcn32_resource.c | 485 +---------------- .../drm/amd/display/dc/dcn32/dcn32_resource.h | 6 - .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 506 +++++++++++++++++- .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 13 +- 4 files changed, 513 insertions(+), 497 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 3385b5c70c8c5..0ca1d4c51baa1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -1752,368 +1752,6 @@ void dcn32_add_phantom_pipes(struct dc *dc, struct dc_state *context, } } -static bool dcn32_split_stream_for_mpc_or_odm( - const struct dc *dc, - struct resource_context *res_ctx, - struct pipe_ctx *pri_pipe, - struct pipe_ctx *sec_pipe, - bool odm) -{ - int pipe_idx = sec_pipe->pipe_idx; - const struct resource_pool *pool = dc->res_pool; - - if (pri_pipe->plane_state) { - /* ODM + window MPO, where MPO window is on left half only */ - if (pri_pipe->plane_state->clip_rect.x + pri_pipe->plane_state->clip_rect.width <= - pri_pipe->stream->src.x + pri_pipe->stream->src.width/2) - return true; - - /* ODM + window MPO, where MPO window is on right half only */ - if (pri_pipe->plane_state->clip_rect.x >= pri_pipe->stream->src.width/2) - return true; - } - - *sec_pipe = *pri_pipe; - - sec_pipe->pipe_idx = pipe_idx; - sec_pipe->plane_res.mi = pool->mis[pipe_idx]; - sec_pipe->plane_res.hubp = pool->hubps[pipe_idx]; - sec_pipe->plane_res.ipp = pool->ipps[pipe_idx]; - sec_pipe->plane_res.xfm = pool->transforms[pipe_idx]; - sec_pipe->plane_res.dpp = pool->dpps[pipe_idx]; - sec_pipe->plane_res.mpcc_inst = pool->dpps[pipe_idx]->inst; - sec_pipe->stream_res.dsc = NULL; - if (odm) { - if (pri_pipe->next_odm_pipe) { - ASSERT(pri_pipe->next_odm_pipe != sec_pipe); - sec_pipe->next_odm_pipe = pri_pipe->next_odm_pipe; - sec_pipe->next_odm_pipe->prev_odm_pipe = sec_pipe; - } - if (pri_pipe->top_pipe && pri_pipe->top_pipe->next_odm_pipe) { - pri_pipe->top_pipe->next_odm_pipe->bottom_pipe = sec_pipe; - sec_pipe->top_pipe = pri_pipe->top_pipe->next_odm_pipe; - } - if (pri_pipe->bottom_pipe && pri_pipe->bottom_pipe->next_odm_pipe) { - pri_pipe->bottom_pipe->next_odm_pipe->top_pipe = sec_pipe; - sec_pipe->bottom_pipe = pri_pipe->bottom_pipe->next_odm_pipe; - } - pri_pipe->next_odm_pipe = sec_pipe; - sec_pipe->prev_odm_pipe = pri_pipe; - ASSERT(sec_pipe->top_pipe == NULL); - - if (!sec_pipe->top_pipe) - sec_pipe->stream_res.opp = pool->opps[pipe_idx]; - else - sec_pipe->stream_res.opp = sec_pipe->top_pipe->stream_res.opp; - if (sec_pipe->stream->timing.flags.DSC == 1) { - dcn20_acquire_dsc(dc, res_ctx, &sec_pipe->stream_res.dsc, pipe_idx); - ASSERT(sec_pipe->stream_res.dsc); - if (sec_pipe->stream_res.dsc == NULL) - return false; - } - } else { - if (pri_pipe->bottom_pipe) { - ASSERT(pri_pipe->bottom_pipe != sec_pipe); - sec_pipe->bottom_pipe = pri_pipe->bottom_pipe; - sec_pipe->bottom_pipe->top_pipe = sec_pipe; - } - pri_pipe->bottom_pipe = sec_pipe; - sec_pipe->top_pipe = pri_pipe; - - ASSERT(pri_pipe->plane_state); - } - - return true; -} - -static struct pipe_ctx *dcn32_find_split_pipe( - struct dc *dc, - struct dc_state *context, - int old_index) -{ - struct pipe_ctx *pipe = NULL; - int i; - - if (old_index >= 0 && context->res_ctx.pipe_ctx[old_index].stream == NULL) { - pipe = &context->res_ctx.pipe_ctx[old_index]; - pipe->pipe_idx = old_index; - } - - if (!pipe) - for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { - if (dc->current_state->res_ctx.pipe_ctx[i].top_pipe == NULL - && dc->current_state->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) { - if (context->res_ctx.pipe_ctx[i].stream == NULL) { - pipe = &context->res_ctx.pipe_ctx[i]; - pipe->pipe_idx = i; - break; - } - } - } - - /* - * May need to fix pipes getting tossed from 1 opp to another on flip - * Add for debugging transient underflow during topology updates: - * ASSERT(pipe); - */ - if (!pipe) - for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { - if (context->res_ctx.pipe_ctx[i].stream == NULL) { - pipe = &context->res_ctx.pipe_ctx[i]; - pipe->pipe_idx = i; - break; - } - } - - return pipe; -} - -static bool dcn32_internal_validate_bw( - struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int *pipe_cnt_out, - int *vlevel_out, - bool fast_validate) -{ - bool out = false; - bool repopulate_pipes = false; - int split[MAX_PIPES] = { 0 }; - bool merge[MAX_PIPES] = { false }; - bool newly_split[MAX_PIPES] = { false }; - int pipe_cnt, i, pipe_idx, vlevel; - struct vba_vars_st *vba = &context->bw_ctx.dml.vba; - - ASSERT(pipes); - if (!pipes) - return false; - - // For each full update, remove all existing phantom pipes first - dc->res_pool->funcs->remove_phantom_pipes(dc, context); - - dc->res_pool->funcs->update_soc_for_wm_a(dc, context); - - pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); - - if (!pipe_cnt) { - out = true; - goto validate_out; - } - - dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt); - - if (!fast_validate) { - DC_FP_START(); - dcn32_full_validate_bw_helper(dc, context, pipes, &vlevel, split, merge, &pipe_cnt); - DC_FP_END(); - } - - if (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states || - vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) { - /* - * If mode is unsupported or there's still no p-state support then - * fall back to favoring voltage. - * - * We don't actually support prefetch mode 2, so require that we - * at least support prefetch mode 1. - */ - context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = - dm_prefetch_support_stutter; - - vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); - if (vlevel < context->bw_ctx.dml.soc.num_states) { - memset(split, 0, MAX_PIPES * sizeof(int)); - memset(merge, 0, MAX_PIPES * sizeof(bool)); - vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); - } - } - - dml_log_mode_support_params(&context->bw_ctx.dml); - - if (vlevel == context->bw_ctx.dml.soc.num_states) - goto validate_fail; - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - struct pipe_ctx *mpo_pipe = pipe->bottom_pipe; - - if (!pipe->stream) - continue; - - /* We only support full screen mpo with ODM */ - if (vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled - && pipe->plane_state && mpo_pipe - && memcmp(&mpo_pipe->plane_res.scl_data.recout, - &pipe->plane_res.scl_data.recout, - sizeof(struct rect)) != 0) { - ASSERT(mpo_pipe->plane_state != pipe->plane_state); - goto validate_fail; - } - pipe_idx++; - } - - /* merge pipes if necessary */ - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - /*skip pipes that don't need merging*/ - if (!merge[i]) - continue; - - /* if ODM merge we ignore mpc tree, mpo pipes will have their own flags */ - if (pipe->prev_odm_pipe) { - /*split off odm pipe*/ - pipe->prev_odm_pipe->next_odm_pipe = pipe->next_odm_pipe; - if (pipe->next_odm_pipe) - pipe->next_odm_pipe->prev_odm_pipe = pipe->prev_odm_pipe; - - pipe->bottom_pipe = NULL; - pipe->next_odm_pipe = NULL; - pipe->plane_state = NULL; - pipe->stream = NULL; - pipe->top_pipe = NULL; - pipe->prev_odm_pipe = NULL; - if (pipe->stream_res.dsc) - dcn20_release_dsc(&context->res_ctx, dc->res_pool, &pipe->stream_res.dsc); - memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); - memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); - repopulate_pipes = true; - } else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) { - struct pipe_ctx *top_pipe = pipe->top_pipe; - struct pipe_ctx *bottom_pipe = pipe->bottom_pipe; - - top_pipe->bottom_pipe = bottom_pipe; - if (bottom_pipe) - bottom_pipe->top_pipe = top_pipe; - - pipe->top_pipe = NULL; - pipe->bottom_pipe = NULL; - pipe->plane_state = NULL; - pipe->stream = NULL; - memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); - memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); - repopulate_pipes = true; - } else - ASSERT(0); /* Should never try to merge master pipe */ - - } - - for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; - struct pipe_ctx *hsplit_pipe = NULL; - bool odm; - int old_index = -1; - - if (!pipe->stream || newly_split[i]) - continue; - - pipe_idx++; - odm = vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled; - - if (!pipe->plane_state && !odm) - continue; - - if (split[i]) { - if (odm) { - if (split[i] == 4 && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe) - old_index = old_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; - else if (old_pipe->next_odm_pipe) - old_index = old_pipe->next_odm_pipe->pipe_idx; - } else { - if (split[i] == 4 && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe && - old_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) - old_index = old_pipe->bottom_pipe->bottom_pipe->pipe_idx; - else if (old_pipe->bottom_pipe && - old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) - old_index = old_pipe->bottom_pipe->pipe_idx; - } - hsplit_pipe = dcn32_find_split_pipe(dc, context, old_index); - ASSERT(hsplit_pipe); - if (!hsplit_pipe) - goto validate_fail; - - if (!dcn32_split_stream_for_mpc_or_odm( - dc, &context->res_ctx, - pipe, hsplit_pipe, odm)) - goto validate_fail; - - newly_split[hsplit_pipe->pipe_idx] = true; - repopulate_pipes = true; - } - if (split[i] == 4) { - struct pipe_ctx *pipe_4to1; - - if (odm && old_pipe->next_odm_pipe) - old_index = old_pipe->next_odm_pipe->pipe_idx; - else if (!odm && old_pipe->bottom_pipe && - old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) - old_index = old_pipe->bottom_pipe->pipe_idx; - else - old_index = -1; - pipe_4to1 = dcn32_find_split_pipe(dc, context, old_index); - ASSERT(pipe_4to1); - if (!pipe_4to1) - goto validate_fail; - if (!dcn32_split_stream_for_mpc_or_odm( - dc, &context->res_ctx, - pipe, pipe_4to1, odm)) - goto validate_fail; - newly_split[pipe_4to1->pipe_idx] = true; - - if (odm && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe - && old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe) - old_index = old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; - else if (!odm && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe && - old_pipe->bottom_pipe->bottom_pipe->bottom_pipe && - old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) - old_index = old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->pipe_idx; - else - old_index = -1; - pipe_4to1 = dcn32_find_split_pipe(dc, context, old_index); - ASSERT(pipe_4to1); - if (!pipe_4to1) - goto validate_fail; - if (!dcn32_split_stream_for_mpc_or_odm( - dc, &context->res_ctx, - hsplit_pipe, pipe_4to1, odm)) - goto validate_fail; - newly_split[pipe_4to1->pipe_idx] = true; - } - if (odm) - dcn20_build_mapped_resource(dc, context, pipe->stream); - } - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (pipe->plane_state) { - if (!resource_build_scaling_params(pipe)) - goto validate_fail; - } - } - - /* Actual dsc count per stream dsc validation*/ - if (!dcn20_validate_dsc(dc, context)) { - vba->ValidationStatus[vba->soc.num_states] = DML_FAIL_DSC_VALIDATION_FAILURE; - goto validate_fail; - } - - if (repopulate_pipes) - pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); - *vlevel_out = vlevel; - *pipe_cnt_out = pipe_cnt; - - out = true; - goto validate_out; - -validate_fail: - out = false; - -validate_out: - return out; -} - bool dcn32_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate) @@ -2129,9 +1767,9 @@ bool dcn32_validate_bandwidth(struct dc *dc, BW_VAL_TRACE_COUNT(); - DC_FP_START(); + DC_FP_START(); out = dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate); - DC_FP_END(); + DC_FP_END(); if (pipe_cnt == 0) goto validate_out; @@ -2317,125 +1955,6 @@ void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context, DC_FP_END(); } -static bool is_dtbclk_required(struct dc *dc, struct dc_state *context) -{ - int i; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - if (is_dp_128b_132b_signal(&context->res_ctx.pipe_ctx[i])) - return true; - } - return false; -} - -void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, - int pipe_cnt, int vlevel) -{ - int i, pipe_idx; - bool usr_retraining_support = false; - bool unbounded_req_enabled = false; - - /* Writeback MCIF_WB arbitration parameters */ - dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt); - - context->bw_ctx.bw.dcn.clk.dispclk_khz = context->bw_ctx.dml.vba.DISPCLK * 1000; - context->bw_ctx.bw.dcn.clk.dcfclk_khz = context->bw_ctx.dml.vba.DCFCLK * 1000; - context->bw_ctx.bw.dcn.clk.socclk_khz = context->bw_ctx.dml.vba.SOCCLK * 1000; - context->bw_ctx.bw.dcn.clk.dramclk_khz = context->bw_ctx.dml.vba.DRAMSpeed * 1000 / 16; - context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = context->bw_ctx.dml.vba.DCFCLKDeepSleep * 1000; - context->bw_ctx.bw.dcn.clk.fclk_khz = context->bw_ctx.dml.vba.FabricClock * 1000; - context->bw_ctx.bw.dcn.clk.p_state_change_support = - context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] - != dm_dram_clock_change_unsupported; - context->bw_ctx.bw.dcn.clk.num_ways = dcn32_helper_calculate_num_ways_for_subvp(dc, context); - /* - * - * TODO: needs FAMS - * Pstate change might not be supported by hardware, but it might be - * possible with firmware driven vertical blank stretching. - */ - // context->bw_ctx.bw.dcn.clk.p_state_change_support |= context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching; - context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; - context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context); - context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = context->bw_ctx.dml.vba.DTBCLKPerState[vlevel] * 1000; - if (context->bw_ctx.dml.vba.FCLKChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_fclock_change_unsupported) - context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = false; - else - context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = true; - - usr_retraining_support = context->bw_ctx.dml.vba.USRRetrainingSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; - ASSERT(usr_retraining_support); - - if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz) - context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz; - - unbounded_req_enabled = get_unbounded_request_enabled(&context->bw_ctx.dml, pipes, pipe_cnt); - - if (unbounded_req_enabled && pipe_cnt > 1) { - // Unbounded requesting should not ever be used when more than 1 pipe is enabled. - ASSERT(false); - unbounded_req_enabled = false; - } - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) { - // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests - context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0; - context->res_ctx.pipe_ctx[i].unbounded_req = false; - } else { - context->res_ctx.pipe_ctx[i].det_buffer_size_kb = get_det_buffer_size_kbytes(&context->bw_ctx.dml, pipes, pipe_cnt, - pipe_idx); - context->res_ctx.pipe_ctx[i].unbounded_req = unbounded_req_enabled; - } - if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) - context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; - context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; - context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest; - pipe_idx++; - } - /*save a original dppclock copy*/ - context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz; - context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz; - context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dppclk_mhz - * 1000; - context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dispclk_mhz - * 1000; - - context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (context->res_ctx.pipe_ctx[i].stream) - context->bw_ctx.bw.dcn.compbuf_size_kb -= context->res_ctx.pipe_ctx[i].det_buffer_size_kb; - } - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - - context->bw_ctx.dml.funcs.rq_dlg_get_dlg_reg_v2(&context->bw_ctx.dml, - &context->res_ctx.pipe_ctx[i].dlg_regs, &context->res_ctx.pipe_ctx[i].ttu_regs, pipes, - pipe_cnt, pipe_idx); - - context->bw_ctx.dml.funcs.rq_dlg_get_rq_reg_v2(&context->res_ctx.pipe_ctx[i].rq_regs, - &context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - - pipe_idx++; - } -} - static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) { if (entry->dcfclk_mhz > 0) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index 37d37067e9831..fc0fe48023a07 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -44,12 +44,6 @@ struct resource_pool *dcn32_create_resource_pool( const struct dc_init_data *init_data, struct dc *dc); -void dcn32_calculate_dlg_params( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel); - struct panel_cntl *dcn32_panel_cntl_create( const struct panel_cntl_init_data *init_data); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 353d3a74e40ba..66102db872655 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -24,12 +24,15 @@ * */ #include "dcn32_fpu.h" +#include "dc_link_dp.h" #include "dcn32/dcn32_resource.h" #include "dcn20/dcn20_resource.h" #include "display_mode_vba_util_32.h" // We need this includes for WATERMARKS_* defines #include "clk_mgr/dcn32/dcn32_smu13_driver_if.h" +#define DC_LOGGER_INIT(logger) + struct _vcs_dpi_ip_params_st dcn3_2_ip = { .gpuvm_enable = 0, .gpuvm_max_page_table_levels = 4, @@ -931,7 +934,7 @@ static bool subvp_validate_static_schedulability(struct dc *dc, return schedulable; } -void dcn32_full_validate_bw_helper(struct dc *dc, +static void dcn32_full_validate_bw_helper(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int *vlevel, @@ -1058,6 +1061,507 @@ void dcn32_full_validate_bw_helper(struct dc *dc, } } +static bool is_dtbclk_required(struct dc *dc, struct dc_state *context) +{ + int i; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + if (is_dp_128b_132b_signal(&context->res_ctx.pipe_ctx[i])) + return true; + } + return false; +} + +static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, int vlevel) +{ + int i, pipe_idx; + bool usr_retraining_support = false; + bool unbounded_req_enabled = false; + + dc_assert_fp_enabled(); + + /* Writeback MCIF_WB arbitration parameters */ + dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt); + + context->bw_ctx.bw.dcn.clk.dispclk_khz = context->bw_ctx.dml.vba.DISPCLK * 1000; + context->bw_ctx.bw.dcn.clk.dcfclk_khz = context->bw_ctx.dml.vba.DCFCLK * 1000; + context->bw_ctx.bw.dcn.clk.socclk_khz = context->bw_ctx.dml.vba.SOCCLK * 1000; + context->bw_ctx.bw.dcn.clk.dramclk_khz = context->bw_ctx.dml.vba.DRAMSpeed * 1000 / 16; + context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = context->bw_ctx.dml.vba.DCFCLKDeepSleep * 1000; + context->bw_ctx.bw.dcn.clk.fclk_khz = context->bw_ctx.dml.vba.FabricClock * 1000; + context->bw_ctx.bw.dcn.clk.p_state_change_support = + context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] + != dm_dram_clock_change_unsupported; + context->bw_ctx.bw.dcn.clk.num_ways = dcn32_helper_calculate_num_ways_for_subvp(dc, context); + + context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; + context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context); + context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = context->bw_ctx.dml.vba.DTBCLKPerState[vlevel] * 1000; + if (context->bw_ctx.dml.vba.FCLKChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_fclock_change_unsupported) + context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = false; + else + context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = true; + + usr_retraining_support = context->bw_ctx.dml.vba.USRRetrainingSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + ASSERT(usr_retraining_support); + + if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz) + context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz; + + unbounded_req_enabled = get_unbounded_request_enabled(&context->bw_ctx.dml, pipes, pipe_cnt); + + if (unbounded_req_enabled && pipe_cnt > 1) { + // Unbounded requesting should not ever be used when more than 1 pipe is enabled. + ASSERT(false); + unbounded_req_enabled = false; + } + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, + pipe_idx); + pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, + pipe_idx); + pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, + pipe_idx); + pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, + pipe_idx); + + if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) { + // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests + context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0; + context->res_ctx.pipe_ctx[i].unbounded_req = false; + } else { + context->res_ctx.pipe_ctx[i].det_buffer_size_kb = get_det_buffer_size_kbytes(&context->bw_ctx.dml, pipes, pipe_cnt, + pipe_idx); + context->res_ctx.pipe_ctx[i].unbounded_req = unbounded_req_enabled; + } + + if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) + context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; + context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; + context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest; + pipe_idx++; + } + /*save a original dppclock copy*/ + context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz; + context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz; + context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dppclk_mhz + * 1000; + context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dispclk_mhz + * 1000; + + context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (context->res_ctx.pipe_ctx[i].stream) + context->bw_ctx.bw.dcn.compbuf_size_kb -= context->res_ctx.pipe_ctx[i].det_buffer_size_kb; + } + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + context->bw_ctx.dml.funcs.rq_dlg_get_dlg_reg_v2(&context->bw_ctx.dml, + &context->res_ctx.pipe_ctx[i].dlg_regs, &context->res_ctx.pipe_ctx[i].ttu_regs, pipes, + pipe_cnt, pipe_idx); + + context->bw_ctx.dml.funcs.rq_dlg_get_rq_reg_v2(&context->res_ctx.pipe_ctx[i].rq_regs, + &context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + pipe_idx++; + } +} + +static struct pipe_ctx *dcn32_find_split_pipe( + struct dc *dc, + struct dc_state *context, + int old_index) +{ + struct pipe_ctx *pipe = NULL; + int i; + + if (old_index >= 0 && context->res_ctx.pipe_ctx[old_index].stream == NULL) { + pipe = &context->res_ctx.pipe_ctx[old_index]; + pipe->pipe_idx = old_index; + } + + if (!pipe) + for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { + if (dc->current_state->res_ctx.pipe_ctx[i].top_pipe == NULL + && dc->current_state->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) { + if (context->res_ctx.pipe_ctx[i].stream == NULL) { + pipe = &context->res_ctx.pipe_ctx[i]; + pipe->pipe_idx = i; + break; + } + } + } + + /* + * May need to fix pipes getting tossed from 1 opp to another on flip + * Add for debugging transient underflow during topology updates: + * ASSERT(pipe); + */ + if (!pipe) + for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { + if (context->res_ctx.pipe_ctx[i].stream == NULL) { + pipe = &context->res_ctx.pipe_ctx[i]; + pipe->pipe_idx = i; + break; + } + } + + return pipe; +} + +static bool dcn32_split_stream_for_mpc_or_odm( + const struct dc *dc, + struct resource_context *res_ctx, + struct pipe_ctx *pri_pipe, + struct pipe_ctx *sec_pipe, + bool odm) +{ + int pipe_idx = sec_pipe->pipe_idx; + const struct resource_pool *pool = dc->res_pool; + + DC_LOGGER_INIT(dc->ctx->logger); + + if (odm && pri_pipe->plane_state) { + /* ODM + window MPO, where MPO window is on left half only */ + if (pri_pipe->plane_state->clip_rect.x + pri_pipe->plane_state->clip_rect.width <= + pri_pipe->stream->src.x + pri_pipe->stream->src.width/2) { + + DC_LOG_SCALER("%s - ODM + window MPO(left). pri_pipe:%d\n", + __func__, + pri_pipe->pipe_idx); + return true; + } + + /* ODM + window MPO, where MPO window is on right half only */ + if (pri_pipe->plane_state->clip_rect.x >= pri_pipe->stream->src.x + pri_pipe->stream->src.width/2) { + + DC_LOG_SCALER("%s - ODM + window MPO(right). pri_pipe:%d\n", + __func__, + pri_pipe->pipe_idx); + return true; + } + } + + *sec_pipe = *pri_pipe; + + sec_pipe->pipe_idx = pipe_idx; + sec_pipe->plane_res.mi = pool->mis[pipe_idx]; + sec_pipe->plane_res.hubp = pool->hubps[pipe_idx]; + sec_pipe->plane_res.ipp = pool->ipps[pipe_idx]; + sec_pipe->plane_res.xfm = pool->transforms[pipe_idx]; + sec_pipe->plane_res.dpp = pool->dpps[pipe_idx]; + sec_pipe->plane_res.mpcc_inst = pool->dpps[pipe_idx]->inst; + sec_pipe->stream_res.dsc = NULL; + if (odm) { + if (pri_pipe->next_odm_pipe) { + ASSERT(pri_pipe->next_odm_pipe != sec_pipe); + sec_pipe->next_odm_pipe = pri_pipe->next_odm_pipe; + sec_pipe->next_odm_pipe->prev_odm_pipe = sec_pipe; + } + if (pri_pipe->top_pipe && pri_pipe->top_pipe->next_odm_pipe) { + pri_pipe->top_pipe->next_odm_pipe->bottom_pipe = sec_pipe; + sec_pipe->top_pipe = pri_pipe->top_pipe->next_odm_pipe; + } + if (pri_pipe->bottom_pipe && pri_pipe->bottom_pipe->next_odm_pipe) { + pri_pipe->bottom_pipe->next_odm_pipe->top_pipe = sec_pipe; + sec_pipe->bottom_pipe = pri_pipe->bottom_pipe->next_odm_pipe; + } + pri_pipe->next_odm_pipe = sec_pipe; + sec_pipe->prev_odm_pipe = pri_pipe; + ASSERT(sec_pipe->top_pipe == NULL); + + if (!sec_pipe->top_pipe) + sec_pipe->stream_res.opp = pool->opps[pipe_idx]; + else + sec_pipe->stream_res.opp = sec_pipe->top_pipe->stream_res.opp; + if (sec_pipe->stream->timing.flags.DSC == 1) { + dcn20_acquire_dsc(dc, res_ctx, &sec_pipe->stream_res.dsc, pipe_idx); + ASSERT(sec_pipe->stream_res.dsc); + if (sec_pipe->stream_res.dsc == NULL) + return false; + } + } else { + if (pri_pipe->bottom_pipe) { + ASSERT(pri_pipe->bottom_pipe != sec_pipe); + sec_pipe->bottom_pipe = pri_pipe->bottom_pipe; + sec_pipe->bottom_pipe->top_pipe = sec_pipe; + } + pri_pipe->bottom_pipe = sec_pipe; + sec_pipe->top_pipe = pri_pipe; + + ASSERT(pri_pipe->plane_state); + } + + return true; +} + +bool dcn32_internal_validate_bw(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *pipe_cnt_out, + int *vlevel_out, + bool fast_validate) +{ + bool out = false; + bool repopulate_pipes = false; + int split[MAX_PIPES] = { 0 }; + bool merge[MAX_PIPES] = { false }; + bool newly_split[MAX_PIPES] = { false }; + int pipe_cnt, i, pipe_idx, vlevel; + struct vba_vars_st *vba = &context->bw_ctx.dml.vba; + + dc_assert_fp_enabled(); + + ASSERT(pipes); + if (!pipes) + return false; + + // For each full update, remove all existing phantom pipes first + dc->res_pool->funcs->remove_phantom_pipes(dc, context); + + dc->res_pool->funcs->update_soc_for_wm_a(dc, context); + + pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); + + if (!pipe_cnt) { + out = true; + goto validate_out; + } + + dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt); + + if (!fast_validate) { + DC_FP_START(); + dcn32_full_validate_bw_helper(dc, context, pipes, &vlevel, split, merge, &pipe_cnt); + DC_FP_END(); + } + + if (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states || + vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) { + /* + * If mode is unsupported or there's still no p-state support then + * fall back to favoring voltage. + * + * If Prefetch mode 0 failed for this config, or passed with Max UCLK, try if + * supported with Prefetch mode 1 (dm_prefetch_support_fclk_and_stutter == 2) + */ + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = + dm_prefetch_support_fclk_and_stutter; + + vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); + + /* Last attempt with Prefetch mode 2 (dm_prefetch_support_stutter == 3) */ + if (vlevel == context->bw_ctx.dml.soc.num_states) { + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = + dm_prefetch_support_stutter; + vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); + } + + if (vlevel < context->bw_ctx.dml.soc.num_states) { + memset(split, 0, sizeof(split)); + memset(merge, 0, sizeof(merge)); + vlevel = dcn20_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); + } + } + + dml_log_mode_support_params(&context->bw_ctx.dml); + + if (vlevel == context->bw_ctx.dml.soc.num_states) + goto validate_fail; + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *mpo_pipe = pipe->bottom_pipe; + + if (!pipe->stream) + continue; + + if (vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled + && !dc->config.enable_windowed_mpo_odm + && pipe->plane_state && mpo_pipe + && memcmp(&mpo_pipe->plane_res.scl_data.recout, + &pipe->plane_res.scl_data.recout, + sizeof(struct rect)) != 0) { + ASSERT(mpo_pipe->plane_state != pipe->plane_state); + goto validate_fail; + } + pipe_idx++; + } + + /* merge pipes if necessary */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + /*skip pipes that don't need merging*/ + if (!merge[i]) + continue; + + /* if ODM merge we ignore mpc tree, mpo pipes will have their own flags */ + if (pipe->prev_odm_pipe) { + /*split off odm pipe*/ + pipe->prev_odm_pipe->next_odm_pipe = pipe->next_odm_pipe; + if (pipe->next_odm_pipe) + pipe->next_odm_pipe->prev_odm_pipe = pipe->prev_odm_pipe; + + pipe->bottom_pipe = NULL; + pipe->next_odm_pipe = NULL; + pipe->plane_state = NULL; + pipe->stream = NULL; + pipe->top_pipe = NULL; + pipe->prev_odm_pipe = NULL; + if (pipe->stream_res.dsc) + dcn20_release_dsc(&context->res_ctx, dc->res_pool, &pipe->stream_res.dsc); + memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); + memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); + repopulate_pipes = true; + } else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) { + struct pipe_ctx *top_pipe = pipe->top_pipe; + struct pipe_ctx *bottom_pipe = pipe->bottom_pipe; + + top_pipe->bottom_pipe = bottom_pipe; + if (bottom_pipe) + bottom_pipe->top_pipe = top_pipe; + + pipe->top_pipe = NULL; + pipe->bottom_pipe = NULL; + pipe->plane_state = NULL; + pipe->stream = NULL; + memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); + memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); + repopulate_pipes = true; + } else + ASSERT(0); /* Should never try to merge master pipe */ + + } + + for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; + struct pipe_ctx *hsplit_pipe = NULL; + bool odm; + int old_index = -1; + + if (!pipe->stream || newly_split[i]) + continue; + + pipe_idx++; + odm = vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled; + + if (!pipe->plane_state && !odm) + continue; + + if (split[i]) { + if (odm) { + if (split[i] == 4 && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; + else if (old_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->pipe_idx; + } else { + if (split[i] == 4 && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe && + old_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->bottom_pipe->pipe_idx; + else if (old_pipe->bottom_pipe && + old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->pipe_idx; + } + hsplit_pipe = dcn32_find_split_pipe(dc, context, old_index); + ASSERT(hsplit_pipe); + if (!hsplit_pipe) + goto validate_fail; + + if (!dcn32_split_stream_for_mpc_or_odm( + dc, &context->res_ctx, + pipe, hsplit_pipe, odm)) + goto validate_fail; + + newly_split[hsplit_pipe->pipe_idx] = true; + repopulate_pipes = true; + } + if (split[i] == 4) { + struct pipe_ctx *pipe_4to1; + + if (odm && old_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->pipe_idx; + else if (!odm && old_pipe->bottom_pipe && + old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->pipe_idx; + else + old_index = -1; + pipe_4to1 = dcn32_find_split_pipe(dc, context, old_index); + ASSERT(pipe_4to1); + if (!pipe_4to1) + goto validate_fail; + if (!dcn32_split_stream_for_mpc_or_odm( + dc, &context->res_ctx, + pipe, pipe_4to1, odm)) + goto validate_fail; + newly_split[pipe_4to1->pipe_idx] = true; + + if (odm && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe + && old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe) + old_index = old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; + else if (!odm && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe && + old_pipe->bottom_pipe->bottom_pipe->bottom_pipe && + old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) + old_index = old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->pipe_idx; + else + old_index = -1; + pipe_4to1 = dcn32_find_split_pipe(dc, context, old_index); + ASSERT(pipe_4to1); + if (!pipe_4to1) + goto validate_fail; + if (!dcn32_split_stream_for_mpc_or_odm( + dc, &context->res_ctx, + hsplit_pipe, pipe_4to1, odm)) + goto validate_fail; + newly_split[pipe_4to1->pipe_idx] = true; + } + if (odm) + dcn20_build_mapped_resource(dc, context, pipe->stream); + } + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (pipe->plane_state) { + if (!resource_build_scaling_params(pipe)) + goto validate_fail; + } + } + + /* Actual dsc count per stream dsc validation*/ + if (!dcn20_validate_dsc(dc, context)) { + vba->ValidationStatus[vba->soc.num_states] = DML_FAIL_DSC_VALIDATION_FAILURE; + goto validate_fail; + } + + if (repopulate_pipes) + pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); + *vlevel_out = vlevel; + *pipe_cnt_out = pipe_cnt; + + out = true; + goto validate_out; + +validate_fail: + out = false; + +validate_out: + return out; +} + + void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int pipe_cnt, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index c7602f084be20..56973debc3483 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -57,13 +57,12 @@ void dcn32_set_phantom_stream_timing(struct dc *dc, unsigned int pipe_cnt, unsigned int dc_pipe_idx); -void dcn32_full_validate_bw_helper(struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int *vlevel, - int *split, - bool *merge, - int *pipe_cnt); +bool dcn32_internal_validate_bw(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int *pipe_cnt_out, + int *vlevel_out, + bool fast_validate); void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, From a4f8f294fe99a678ce0a50d649732440c41742b7 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 8 Jul 2022 11:55:39 -0400 Subject: [PATCH 080/134] drm/amd/display: Move ntuple to insert entry Move get_optimal_ntuple to the FPU code and call it inside insert_entry_into_table_sorted. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn32/dcn32_resource.c | 28 ------------------- .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 25 +++++++++++++++++ 2 files changed, 25 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 0ca1d4c51baa1..db40080169116 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -1955,29 +1955,6 @@ void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context, DC_FP_END(); } -static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) -{ - if (entry->dcfclk_mhz > 0) { - float bw_on_sdp = entry->dcfclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100); - - entry->fabricclk_mhz = bw_on_sdp / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100)); - entry->dram_speed_mts = bw_on_sdp / (dcn3_2_soc.num_chans * - dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); - } else if (entry->fabricclk_mhz > 0) { - float bw_on_fabric = entry->fabricclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100); - - entry->dcfclk_mhz = bw_on_fabric / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100)); - entry->dram_speed_mts = bw_on_fabric / (dcn3_2_soc.num_chans * - dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); - } else if (entry->dram_speed_mts > 0) { - float bw_on_dram = entry->dram_speed_mts * dcn3_2_soc.num_chans * - dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); - - entry->fabricclk_mhz = bw_on_dram / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100)); - entry->dcfclk_mhz = bw_on_dram / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100)); - } -} - static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, unsigned int index) { @@ -2061,7 +2038,6 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = 0; entry.dram_speed_mts = 0; - get_optimal_ntuple(&entry); DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, &entry); DC_FP_END(); @@ -2072,7 +2048,6 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = 0; entry.dram_speed_mts = 0; - get_optimal_ntuple(&entry); DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, &entry); DC_FP_END(); @@ -2083,7 +2058,6 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = 0; entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16; - get_optimal_ntuple(&entry); DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, &entry); DC_FP_END(); @@ -2096,7 +2070,6 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; entry.dram_speed_mts = 0; - get_optimal_ntuple(&entry); DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, &entry); DC_FP_END(); @@ -2108,7 +2081,6 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = max_fclk_mhz; entry.dram_speed_mts = 0; - get_optimal_ntuple(&entry); DC_FP_START(); insert_entry_into_table_sorted(table, num_entries, &entry); DC_FP_END(); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 66102db872655..7c60a954737b9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -353,6 +353,29 @@ static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st * return limiting_bw_kbytes_sec; } +static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) +{ + if (entry->dcfclk_mhz > 0) { + float bw_on_sdp = entry->dcfclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100); + + entry->fabricclk_mhz = bw_on_sdp / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100)); + entry->dram_speed_mts = bw_on_sdp / (dcn3_2_soc.num_chans * + dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); + } else if (entry->fabricclk_mhz > 0) { + float bw_on_fabric = entry->fabricclk_mhz * dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100); + + entry->dcfclk_mhz = bw_on_fabric / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100)); + entry->dram_speed_mts = bw_on_fabric / (dcn3_2_soc.num_chans * + dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); + } else if (entry->dram_speed_mts > 0) { + float bw_on_dram = entry->dram_speed_mts * dcn3_2_soc.num_chans * + dcn3_2_soc.dram_channel_width_bytes * ((float)dcn3_2_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); + + entry->fabricclk_mhz = bw_on_dram / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_fabric_bw_after_urgent / 100)); + entry->dcfclk_mhz = bw_on_dram / (dcn3_2_soc.return_bus_width_bytes * ((float)dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / 100)); + } +} + void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, struct _vcs_dpi_voltage_scaling_st *entry) @@ -363,6 +386,8 @@ void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, dc_assert_fp_enabled(); + get_optimal_ntuple(entry); + if (*num_entries == 0) { table[0] = *entry; (*num_entries)++; From 4e14e0fc9f0e47d9d3d39b6f92724f2038ee6e25 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 8 Jul 2022 12:06:27 -0400 Subject: [PATCH 081/134] drm/amd/display: Move bounding box to FPU folder The final part of the DCN32 code that uses FPU is the bounding box code, and this commit move it to dcn32_fpu. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn32/dcn32_resource.c | 460 +---------------- .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 470 ++++++++++++++++++ .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 2 + 3 files changed, 474 insertions(+), 458 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index db40080169116..e551d2936d03d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -1922,29 +1922,6 @@ static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap }; - -static void dcn32_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, - unsigned int *optimal_dcfclk, - unsigned int *optimal_fclk) -{ - double bw_from_dram, bw_from_dram1, bw_from_dram2; - - bw_from_dram1 = uclk_mts * dcn3_2_soc.num_chans * - dcn3_2_soc.dram_channel_width_bytes * (dcn3_2_soc.max_avg_dram_bw_use_normal_percent / 100); - bw_from_dram2 = uclk_mts * dcn3_2_soc.num_chans * - dcn3_2_soc.dram_channel_width_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100); - - bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2; - - if (optimal_fclk) - *optimal_fclk = bw_from_dram / - (dcn3_2_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100)); - - if (optimal_dcfclk) - *optimal_dcfclk = bw_from_dram / - (dcn3_2_soc.return_bus_width_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100)); -} - void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int pipe_cnt, @@ -1955,444 +1932,11 @@ void dcn32_calculate_wm_and_dlg(struct dc *dc, struct dc_state *context, DC_FP_END(); } -static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, - unsigned int index) -{ - int i; - - if (*num_entries == 0) - return; - - for (i = index; i < *num_entries - 1; i++) { - table[i] = table[i + 1]; - } - memset(&table[--(*num_entries)], 0, sizeof(struct _vcs_dpi_voltage_scaling_st)); -} - -static int build_synthetic_soc_states(struct clk_bw_params *bw_params, - struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) +static void dcn32_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { - int i, j; - struct _vcs_dpi_voltage_scaling_st entry = {0}; - - unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, - max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0; - - unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299; - - static const unsigned int num_dcfclk_stas = 5; - unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; - - unsigned int num_uclk_dpms = 0; - unsigned int num_fclk_dpms = 0; - unsigned int num_dcfclk_dpms = 0; - - for (i = 0; i < MAX_NUM_DPM_LVL; i++) { - if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) - max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; - if (bw_params->clk_table.entries[i].fclk_mhz > max_fclk_mhz) - max_fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; - if (bw_params->clk_table.entries[i].memclk_mhz > max_uclk_mhz) - max_uclk_mhz = bw_params->clk_table.entries[i].memclk_mhz; - if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) - max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; - if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) - max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; - if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) - max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; - if (bw_params->clk_table.entries[i].dtbclk_mhz > max_dtbclk_mhz) - max_dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; - - if (bw_params->clk_table.entries[i].memclk_mhz > 0) - num_uclk_dpms++; - if (bw_params->clk_table.entries[i].fclk_mhz > 0) - num_fclk_dpms++; - if (bw_params->clk_table.entries[i].dcfclk_mhz > 0) - num_dcfclk_dpms++; - } - - if (!max_dcfclk_mhz || !max_dispclk_mhz || !max_dtbclk_mhz) - return -1; - - if (max_dppclk_mhz == 0) - max_dppclk_mhz = max_dispclk_mhz; - - if (max_fclk_mhz == 0) - max_fclk_mhz = max_dcfclk_mhz * dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / dcn3_2_soc.pct_ideal_fabric_bw_after_urgent; - - if (max_phyclk_mhz == 0) - max_phyclk_mhz = dcn3_2_soc.clock_limits[0].phyclk_mhz; - - *num_entries = 0; - entry.dispclk_mhz = max_dispclk_mhz; - entry.dscclk_mhz = max_dispclk_mhz / 3; - entry.dppclk_mhz = max_dppclk_mhz; - entry.dtbclk_mhz = max_dtbclk_mhz; - entry.phyclk_mhz = max_phyclk_mhz; - entry.phyclk_d18_mhz = dcn3_2_soc.clock_limits[0].phyclk_d18_mhz; - entry.phyclk_d32_mhz = dcn3_2_soc.clock_limits[0].phyclk_d32_mhz; - - // Insert all the DCFCLK STAs - for (i = 0; i < num_dcfclk_stas; i++) { - entry.dcfclk_mhz = dcfclk_sta_targets[i]; - entry.fabricclk_mhz = 0; - entry.dram_speed_mts = 0; - - DC_FP_START(); - insert_entry_into_table_sorted(table, num_entries, &entry); - DC_FP_END(); - } - - // Insert the max DCFCLK - entry.dcfclk_mhz = max_dcfclk_mhz; - entry.fabricclk_mhz = 0; - entry.dram_speed_mts = 0; - DC_FP_START(); - insert_entry_into_table_sorted(table, num_entries, &entry); + dcn32_update_bw_bounding_box_fpu(dc, bw_params); DC_FP_END(); - - // Insert the UCLK DPMS - for (i = 0; i < num_uclk_dpms; i++) { - entry.dcfclk_mhz = 0; - entry.fabricclk_mhz = 0; - entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16; - - DC_FP_START(); - insert_entry_into_table_sorted(table, num_entries, &entry); - DC_FP_END(); - } - - // If FCLK is coarse grained, insert individual DPMs. - if (num_fclk_dpms > 2) { - for (i = 0; i < num_fclk_dpms; i++) { - entry.dcfclk_mhz = 0; - entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; - entry.dram_speed_mts = 0; - - DC_FP_START(); - insert_entry_into_table_sorted(table, num_entries, &entry); - DC_FP_END(); - } - } - // If FCLK fine grained, only insert max - else { - entry.dcfclk_mhz = 0; - entry.fabricclk_mhz = max_fclk_mhz; - entry.dram_speed_mts = 0; - - DC_FP_START(); - insert_entry_into_table_sorted(table, num_entries, &entry); - DC_FP_END(); - } - - // At this point, the table contains all "points of interest" based on - // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock - // ratios (by derate, are exact). - - // Remove states that require higher clocks than are supported - for (i = *num_entries - 1; i >= 0 ; i--) { - if (table[i].dcfclk_mhz > max_dcfclk_mhz || - table[i].fabricclk_mhz > max_fclk_mhz || - table[i].dram_speed_mts > max_uclk_mhz * 16) - remove_entry_from_table_at_index(table, num_entries, i); - } - - // At this point, the table only contains supported points of interest - // it could be used as is, but some states may be redundant due to - // coarse grained nature of some clocks, so we want to round up to - // coarse grained DPMs and remove duplicates. - - // Round up UCLKs - for (i = *num_entries - 1; i >= 0 ; i--) { - for (j = 0; j < num_uclk_dpms; j++) { - if (bw_params->clk_table.entries[j].memclk_mhz * 16 >= table[i].dram_speed_mts) { - table[i].dram_speed_mts = bw_params->clk_table.entries[j].memclk_mhz * 16; - break; - } - } - } - - // If FCLK is coarse grained, round up to next DPMs - if (num_fclk_dpms > 2) { - for (i = *num_entries - 1; i >= 0 ; i--) { - for (j = 0; j < num_fclk_dpms; j++) { - if (bw_params->clk_table.entries[j].fclk_mhz >= table[i].fabricclk_mhz) { - table[i].fabricclk_mhz = bw_params->clk_table.entries[j].fclk_mhz; - break; - } - } - } - } - // Otherwise, round up to minimum. - else { - for (i = *num_entries - 1; i >= 0 ; i--) { - if (table[i].fabricclk_mhz < min_fclk_mhz) { - table[i].fabricclk_mhz = min_fclk_mhz; - break; - } - } - } - - // Round DCFCLKs up to minimum - for (i = *num_entries - 1; i >= 0 ; i--) { - if (table[i].dcfclk_mhz < min_dcfclk_mhz) { - table[i].dcfclk_mhz = min_dcfclk_mhz; - break; - } - } - - // Remove duplicate states, note duplicate states are always neighbouring since table is sorted. - i = 0; - while (i < *num_entries - 1) { - if (table[i].dcfclk_mhz == table[i + 1].dcfclk_mhz && - table[i].fabricclk_mhz == table[i + 1].fabricclk_mhz && - table[i].dram_speed_mts == table[i + 1].dram_speed_mts) - remove_entry_from_table_at_index(table, num_entries, i + 1); - else - i++; - } - - // Fix up the state indicies - for (i = *num_entries - 1; i >= 0 ; i--) { - table[i].state = i; - } - - return 0; -} - -/* dcn32_update_bw_bounding_box - * This would override some dcn3_2 ip_or_soc initial parameters hardcoded from spreadsheet - * with actual values as per dGPU SKU: - * -with passed few options from dc->config - * -with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might need to get it from PM FW) - * -with passed latency values (passed in ns units) in dc-> bb override for debugging purposes - * -with passed latencies from VBIOS (in 100_ns units) if available for certain dGPU SKU - * -with number of DRAM channels from VBIOS (which differ for certain dGPU SKU of the same ASIC) - * -clocks levels with passed clk_table entries from Clk Mgr as reported by PM FW for different - * clocks (which might differ for certain dGPU SKU of the same ASIC) - */ -static void dcn32_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) -{ - if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { - - /* Overrides from dc->config options */ - dcn3_2_ip.clamp_min_dcfclk = dc->config.clamp_min_dcfclk; - - /* Override from passed dc->bb_overrides if available*/ - if ((int)(dcn3_2_soc.sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns - && dc->bb_overrides.sr_exit_time_ns) { - dcn3_2_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0; - } - - if ((int)(dcn3_2_soc.sr_enter_plus_exit_time_us * 1000) - != dc->bb_overrides.sr_enter_plus_exit_time_ns - && dc->bb_overrides.sr_enter_plus_exit_time_ns) { - dcn3_2_soc.sr_enter_plus_exit_time_us = - dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; - } - - if ((int)(dcn3_2_soc.urgent_latency_us * 1000) != dc->bb_overrides.urgent_latency_ns - && dc->bb_overrides.urgent_latency_ns) { - dcn3_2_soc.urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0; - } - - if ((int)(dcn3_2_soc.dram_clock_change_latency_us * 1000) - != dc->bb_overrides.dram_clock_change_latency_ns - && dc->bb_overrides.dram_clock_change_latency_ns) { - dcn3_2_soc.dram_clock_change_latency_us = - dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; - } - - if ((int)(dcn3_2_soc.dummy_pstate_latency_us * 1000) - != dc->bb_overrides.dummy_clock_change_latency_ns - && dc->bb_overrides.dummy_clock_change_latency_ns) { - dcn3_2_soc.dummy_pstate_latency_us = - dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0; - } - - /* Override from VBIOS if VBIOS bb_info available */ - if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { - struct bp_soc_bb_info bb_info = {0}; - - if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { - if (bb_info.dram_clock_change_latency_100ns > 0) - dcn3_2_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10; - - if (bb_info.dram_sr_enter_exit_latency_100ns > 0) - dcn3_2_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10; - - if (bb_info.dram_sr_exit_latency_100ns > 0) - dcn3_2_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10; - } - } - - /* Override from VBIOS for num_chan */ - if (dc->ctx->dc_bios->vram_info.num_chans) - dcn3_2_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans; - - if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) - dcn3_2_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; - - } - - /* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */ - dcn3_2_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; - dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; - - /* Overrides Clock levelsfrom CLK Mgr table entries as reported by PM FW */ - if ((!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) && (bw_params->clk_table.entries[0].memclk_mhz)) { - if (dc->debug.use_legacy_soc_bb_mechanism) { - unsigned int i = 0, j = 0, num_states = 0; - - unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0}; - unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0}; - unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0}; - unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0}; - unsigned int min_dcfclk = UINT_MAX; - /* Set 199 as first value in STA target array to have a minimum DCFCLK value. - * For DCN32 we set min to 199 so minimum FCLK DPM0 (300Mhz can be achieved) */ - unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; - unsigned int num_dcfclk_sta_targets = 4, num_uclk_states = 0; - unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0; - - for (i = 0; i < MAX_NUM_DPM_LVL; i++) { - if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) - max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; - if (bw_params->clk_table.entries[i].dcfclk_mhz != 0 && - bw_params->clk_table.entries[i].dcfclk_mhz < min_dcfclk) - min_dcfclk = bw_params->clk_table.entries[i].dcfclk_mhz; - if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) - max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; - if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) - max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; - if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) - max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; - } - if (min_dcfclk > dcfclk_sta_targets[0]) - dcfclk_sta_targets[0] = min_dcfclk; - if (!max_dcfclk_mhz) - max_dcfclk_mhz = dcn3_2_soc.clock_limits[0].dcfclk_mhz; - if (!max_dispclk_mhz) - max_dispclk_mhz = dcn3_2_soc.clock_limits[0].dispclk_mhz; - if (!max_dppclk_mhz) - max_dppclk_mhz = dcn3_2_soc.clock_limits[0].dppclk_mhz; - if (!max_phyclk_mhz) - max_phyclk_mhz = dcn3_2_soc.clock_limits[0].phyclk_mhz; - - if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { - // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array - dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz; - num_dcfclk_sta_targets++; - } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { - // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates - for (i = 0; i < num_dcfclk_sta_targets; i++) { - if (dcfclk_sta_targets[i] > max_dcfclk_mhz) { - dcfclk_sta_targets[i] = max_dcfclk_mhz; - break; - } - } - // Update size of array since we "removed" duplicates - num_dcfclk_sta_targets = i + 1; - } - - num_uclk_states = bw_params->clk_table.num_entries; - - // Calculate optimal dcfclk for each uclk - for (i = 0; i < num_uclk_states; i++) { - dcn32_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16, - &optimal_dcfclk_for_uclk[i], NULL); - if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) { - optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz; - } - } - - // Calculate optimal uclk for each dcfclk sta target - for (i = 0; i < num_dcfclk_sta_targets; i++) { - for (j = 0; j < num_uclk_states; j++) { - if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { - optimal_uclk_for_dcfclk_sta_targets[i] = - bw_params->clk_table.entries[j].memclk_mhz * 16; - break; - } - } - } - - i = 0; - j = 0; - // create the final dcfclk and uclk table - while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { - if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { - dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; - dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; - } else { - if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { - dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; - dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; - } else { - j = num_uclk_states; - } - } - } - - while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) { - dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; - dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; - } - - while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES && - optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { - dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; - dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; - } - - dcn3_2_soc.num_states = num_states; - for (i = 0; i < dcn3_2_soc.num_states; i++) { - dcn3_2_soc.clock_limits[i].state = i; - dcn3_2_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i]; - dcn3_2_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i]; - - /* Fill all states with max values of all these clocks */ - dcn3_2_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; - dcn3_2_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; - dcn3_2_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; - dcn3_2_soc.clock_limits[i].dscclk_mhz = max_dispclk_mhz / 3; - - /* Populate from bw_params for DTBCLK, SOCCLK */ - if (i > 0) { - if (!bw_params->clk_table.entries[i].dtbclk_mhz) { - dcn3_2_soc.clock_limits[i].dtbclk_mhz = dcn3_2_soc.clock_limits[i-1].dtbclk_mhz; - } else { - dcn3_2_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; - } - } else if (bw_params->clk_table.entries[i].dtbclk_mhz) { - dcn3_2_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; - } - - if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) - dcn3_2_soc.clock_limits[i].socclk_mhz = dcn3_2_soc.clock_limits[i-1].socclk_mhz; - else - dcn3_2_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz; - - if (!dram_speed_mts[i] && i > 0) - dcn3_2_soc.clock_limits[i].dram_speed_mts = dcn3_2_soc.clock_limits[i-1].dram_speed_mts; - else - dcn3_2_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i]; - - /* These clocks cannot come from bw_params, always fill from dcn3_2_soc[0] */ - /* PHYCLK_D18, PHYCLK_D32 */ - dcn3_2_soc.clock_limits[i].phyclk_d18_mhz = dcn3_2_soc.clock_limits[0].phyclk_d18_mhz; - dcn3_2_soc.clock_limits[i].phyclk_d32_mhz = dcn3_2_soc.clock_limits[0].phyclk_d32_mhz; - } - } else { - build_synthetic_soc_states(bw_params, dcn3_2_soc.clock_limits, &dcn3_2_soc.num_states); - } - - /* Re-init DML with updated bb */ - dml_init_instance(&dc->dml, &dcn3_2_soc, &dcn3_2_ip, DML_PROJECT_DCN32); - if (dc->current_state) - dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_2_soc, &dcn3_2_ip, DML_PROJECT_DCN32); - } } static struct resource_funcs dcn32_res_pool_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 7c60a954737b9..9175fe1f9be30 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -1772,3 +1772,473 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, } +static void dcn32_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, + unsigned int *optimal_dcfclk, + unsigned int *optimal_fclk) +{ + double bw_from_dram, bw_from_dram1, bw_from_dram2; + + bw_from_dram1 = uclk_mts * dcn3_2_soc.num_chans * + dcn3_2_soc.dram_channel_width_bytes * (dcn3_2_soc.max_avg_dram_bw_use_normal_percent / 100); + bw_from_dram2 = uclk_mts * dcn3_2_soc.num_chans * + dcn3_2_soc.dram_channel_width_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100); + + bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2; + + if (optimal_fclk) + *optimal_fclk = bw_from_dram / + (dcn3_2_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100)); + + if (optimal_dcfclk) + *optimal_dcfclk = bw_from_dram / + (dcn3_2_soc.return_bus_width_bytes * (dcn3_2_soc.max_avg_sdp_bw_use_normal_percent / 100)); +} + +static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, + unsigned int index) +{ + int i; + + if (*num_entries == 0) + return; + + for (i = index; i < *num_entries - 1; i++) { + table[i] = table[i + 1]; + } + memset(&table[--(*num_entries)], 0, sizeof(struct _vcs_dpi_voltage_scaling_st)); +} + +static int build_synthetic_soc_states(struct clk_bw_params *bw_params, + struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) +{ + int i, j; + struct _vcs_dpi_voltage_scaling_st entry = {0}; + + unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, + max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0; + + unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299; + + static const unsigned int num_dcfclk_stas = 5; + unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; + + unsigned int num_uclk_dpms = 0; + unsigned int num_fclk_dpms = 0; + unsigned int num_dcfclk_dpms = 0; + + for (i = 0; i < MAX_NUM_DPM_LVL; i++) { + if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) + max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; + if (bw_params->clk_table.entries[i].fclk_mhz > max_fclk_mhz) + max_fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; + if (bw_params->clk_table.entries[i].memclk_mhz > max_uclk_mhz) + max_uclk_mhz = bw_params->clk_table.entries[i].memclk_mhz; + if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; + if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; + if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) + max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; + if (bw_params->clk_table.entries[i].dtbclk_mhz > max_dtbclk_mhz) + max_dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; + + if (bw_params->clk_table.entries[i].memclk_mhz > 0) + num_uclk_dpms++; + if (bw_params->clk_table.entries[i].fclk_mhz > 0) + num_fclk_dpms++; + if (bw_params->clk_table.entries[i].dcfclk_mhz > 0) + num_dcfclk_dpms++; + } + + if (!max_dcfclk_mhz || !max_dispclk_mhz || !max_dtbclk_mhz) + return -1; + + if (max_dppclk_mhz == 0) + max_dppclk_mhz = max_dispclk_mhz; + + if (max_fclk_mhz == 0) + max_fclk_mhz = max_dcfclk_mhz * dcn3_2_soc.pct_ideal_sdp_bw_after_urgent / dcn3_2_soc.pct_ideal_fabric_bw_after_urgent; + + if (max_phyclk_mhz == 0) + max_phyclk_mhz = dcn3_2_soc.clock_limits[0].phyclk_mhz; + + *num_entries = 0; + entry.dispclk_mhz = max_dispclk_mhz; + entry.dscclk_mhz = max_dispclk_mhz / 3; + entry.dppclk_mhz = max_dppclk_mhz; + entry.dtbclk_mhz = max_dtbclk_mhz; + entry.phyclk_mhz = max_phyclk_mhz; + entry.phyclk_d18_mhz = dcn3_2_soc.clock_limits[0].phyclk_d18_mhz; + entry.phyclk_d32_mhz = dcn3_2_soc.clock_limits[0].phyclk_d32_mhz; + + // Insert all the DCFCLK STAs + for (i = 0; i < num_dcfclk_stas; i++) { + entry.dcfclk_mhz = dcfclk_sta_targets[i]; + entry.fabricclk_mhz = 0; + entry.dram_speed_mts = 0; + + DC_FP_START(); + insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); + } + + // Insert the max DCFCLK + entry.dcfclk_mhz = max_dcfclk_mhz; + entry.fabricclk_mhz = 0; + entry.dram_speed_mts = 0; + + DC_FP_START(); + insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); + + // Insert the UCLK DPMS + for (i = 0; i < num_uclk_dpms; i++) { + entry.dcfclk_mhz = 0; + entry.fabricclk_mhz = 0; + entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16; + + DC_FP_START(); + insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); + } + + // If FCLK is coarse grained, insert individual DPMs. + if (num_fclk_dpms > 2) { + for (i = 0; i < num_fclk_dpms; i++) { + entry.dcfclk_mhz = 0; + entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; + entry.dram_speed_mts = 0; + + DC_FP_START(); + insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); + } + } + // If FCLK fine grained, only insert max + else { + entry.dcfclk_mhz = 0; + entry.fabricclk_mhz = max_fclk_mhz; + entry.dram_speed_mts = 0; + + DC_FP_START(); + insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); + } + + // At this point, the table contains all "points of interest" based on + // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock + // ratios (by derate, are exact). + + // Remove states that require higher clocks than are supported + for (i = *num_entries - 1; i >= 0 ; i--) { + if (table[i].dcfclk_mhz > max_dcfclk_mhz || + table[i].fabricclk_mhz > max_fclk_mhz || + table[i].dram_speed_mts > max_uclk_mhz * 16) + remove_entry_from_table_at_index(table, num_entries, i); + } + + // At this point, the table only contains supported points of interest + // it could be used as is, but some states may be redundant due to + // coarse grained nature of some clocks, so we want to round up to + // coarse grained DPMs and remove duplicates. + + // Round up UCLKs + for (i = *num_entries - 1; i >= 0 ; i--) { + for (j = 0; j < num_uclk_dpms; j++) { + if (bw_params->clk_table.entries[j].memclk_mhz * 16 >= table[i].dram_speed_mts) { + table[i].dram_speed_mts = bw_params->clk_table.entries[j].memclk_mhz * 16; + break; + } + } + } + + // If FCLK is coarse grained, round up to next DPMs + if (num_fclk_dpms > 2) { + for (i = *num_entries - 1; i >= 0 ; i--) { + for (j = 0; j < num_fclk_dpms; j++) { + if (bw_params->clk_table.entries[j].fclk_mhz >= table[i].fabricclk_mhz) { + table[i].fabricclk_mhz = bw_params->clk_table.entries[j].fclk_mhz; + break; + } + } + } + } + // Otherwise, round up to minimum. + else { + for (i = *num_entries - 1; i >= 0 ; i--) { + if (table[i].fabricclk_mhz < min_fclk_mhz) { + table[i].fabricclk_mhz = min_fclk_mhz; + break; + } + } + } + + // Round DCFCLKs up to minimum + for (i = *num_entries - 1; i >= 0 ; i--) { + if (table[i].dcfclk_mhz < min_dcfclk_mhz) { + table[i].dcfclk_mhz = min_dcfclk_mhz; + break; + } + } + + // Remove duplicate states, note duplicate states are always neighbouring since table is sorted. + i = 0; + while (i < *num_entries - 1) { + if (table[i].dcfclk_mhz == table[i + 1].dcfclk_mhz && + table[i].fabricclk_mhz == table[i + 1].fabricclk_mhz && + table[i].dram_speed_mts == table[i + 1].dram_speed_mts) + remove_entry_from_table_at_index(table, num_entries, i + 1); + else + i++; + } + + // Fix up the state indicies + for (i = *num_entries - 1; i >= 0 ; i--) { + table[i].state = i; + } + + return 0; +} + +/** + * dcn32_update_bw_bounding_box + * + * This would override some dcn3_2 ip_or_soc initial parameters hardcoded from + * spreadsheet with actual values as per dGPU SKU: + * - with passed few options from dc->config + * - with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might + * need to get it from PM FW) + * - with passed latency values (passed in ns units) in dc-> bb override for + * debugging purposes + * - with passed latencies from VBIOS (in 100_ns units) if available for + * certain dGPU SKU + * - with number of DRAM channels from VBIOS (which differ for certain dGPU SKU + * of the same ASIC) + * - clocks levels with passed clk_table entries from Clk Mgr as reported by PM + * FW for different clocks (which might differ for certain dGPU SKU of the + * same ASIC) + */ +void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params) +{ + dc_assert_fp_enabled(); + + if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { + /* Overrides from dc->config options */ + dcn3_2_ip.clamp_min_dcfclk = dc->config.clamp_min_dcfclk; + + /* Override from passed dc->bb_overrides if available*/ + if ((int)(dcn3_2_soc.sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns + && dc->bb_overrides.sr_exit_time_ns) { + dcn3_2_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0; + } + + if ((int)(dcn3_2_soc.sr_enter_plus_exit_time_us * 1000) + != dc->bb_overrides.sr_enter_plus_exit_time_ns + && dc->bb_overrides.sr_enter_plus_exit_time_ns) { + dcn3_2_soc.sr_enter_plus_exit_time_us = + dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; + } + + if ((int)(dcn3_2_soc.urgent_latency_us * 1000) != dc->bb_overrides.urgent_latency_ns + && dc->bb_overrides.urgent_latency_ns) { + dcn3_2_soc.urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0; + } + + if ((int)(dcn3_2_soc.dram_clock_change_latency_us * 1000) + != dc->bb_overrides.dram_clock_change_latency_ns + && dc->bb_overrides.dram_clock_change_latency_ns) { + dcn3_2_soc.dram_clock_change_latency_us = + dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; + } + + if ((int)(dcn3_2_soc.dummy_pstate_latency_us * 1000) + != dc->bb_overrides.dummy_clock_change_latency_ns + && dc->bb_overrides.dummy_clock_change_latency_ns) { + dcn3_2_soc.dummy_pstate_latency_us = + dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0; + } + + /* Override from VBIOS if VBIOS bb_info available */ + if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { + struct bp_soc_bb_info bb_info = {0}; + + if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { + if (bb_info.dram_clock_change_latency_100ns > 0) + dcn3_2_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10; + + if (bb_info.dram_sr_enter_exit_latency_100ns > 0) + dcn3_2_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10; + + if (bb_info.dram_sr_exit_latency_100ns > 0) + dcn3_2_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10; + } + } + + /* Override from VBIOS for num_chan */ + if (dc->ctx->dc_bios->vram_info.num_chans) + dcn3_2_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans; + + if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) + dcn3_2_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; + + } + + /* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */ + dcn3_2_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + + /* Overrides Clock levelsfrom CLK Mgr table entries as reported by PM FW */ + if ((!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) && (bw_params->clk_table.entries[0].memclk_mhz)) { + if (dc->debug.use_legacy_soc_bb_mechanism) { + unsigned int i = 0, j = 0, num_states = 0; + + unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0}; + unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0}; + unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0}; + unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0}; + unsigned int min_dcfclk = UINT_MAX; + /* Set 199 as first value in STA target array to have a minimum DCFCLK value. + * For DCN32 we set min to 199 so minimum FCLK DPM0 (300Mhz can be achieved) */ + unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; + unsigned int num_dcfclk_sta_targets = 4, num_uclk_states = 0; + unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0; + + for (i = 0; i < MAX_NUM_DPM_LVL; i++) { + if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) + max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; + if (bw_params->clk_table.entries[i].dcfclk_mhz != 0 && + bw_params->clk_table.entries[i].dcfclk_mhz < min_dcfclk) + min_dcfclk = bw_params->clk_table.entries[i].dcfclk_mhz; + if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; + if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; + if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) + max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; + } + if (min_dcfclk > dcfclk_sta_targets[0]) + dcfclk_sta_targets[0] = min_dcfclk; + if (!max_dcfclk_mhz) + max_dcfclk_mhz = dcn3_2_soc.clock_limits[0].dcfclk_mhz; + if (!max_dispclk_mhz) + max_dispclk_mhz = dcn3_2_soc.clock_limits[0].dispclk_mhz; + if (!max_dppclk_mhz) + max_dppclk_mhz = dcn3_2_soc.clock_limits[0].dppclk_mhz; + if (!max_phyclk_mhz) + max_phyclk_mhz = dcn3_2_soc.clock_limits[0].phyclk_mhz; + + if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { + // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array + dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz; + num_dcfclk_sta_targets++; + } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { + // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates + for (i = 0; i < num_dcfclk_sta_targets; i++) { + if (dcfclk_sta_targets[i] > max_dcfclk_mhz) { + dcfclk_sta_targets[i] = max_dcfclk_mhz; + break; + } + } + // Update size of array since we "removed" duplicates + num_dcfclk_sta_targets = i + 1; + } + + num_uclk_states = bw_params->clk_table.num_entries; + + // Calculate optimal dcfclk for each uclk + for (i = 0; i < num_uclk_states; i++) { + dcn32_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16, + &optimal_dcfclk_for_uclk[i], NULL); + if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) { + optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz; + } + } + + // Calculate optimal uclk for each dcfclk sta target + for (i = 0; i < num_dcfclk_sta_targets; i++) { + for (j = 0; j < num_uclk_states; j++) { + if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { + optimal_uclk_for_dcfclk_sta_targets[i] = + bw_params->clk_table.entries[j].memclk_mhz * 16; + break; + } + } + } + + i = 0; + j = 0; + // create the final dcfclk and uclk table + while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { + if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { + dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; + dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; + } else { + if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { + dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; + dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; + } else { + j = num_uclk_states; + } + } + } + + while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) { + dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; + dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; + } + + while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES && + optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { + dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; + dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; + } + + dcn3_2_soc.num_states = num_states; + for (i = 0; i < dcn3_2_soc.num_states; i++) { + dcn3_2_soc.clock_limits[i].state = i; + dcn3_2_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i]; + dcn3_2_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i]; + + /* Fill all states with max values of all these clocks */ + dcn3_2_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; + dcn3_2_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; + dcn3_2_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; + dcn3_2_soc.clock_limits[i].dscclk_mhz = max_dispclk_mhz / 3; + + /* Populate from bw_params for DTBCLK, SOCCLK */ + if (i > 0) { + if (!bw_params->clk_table.entries[i].dtbclk_mhz) { + dcn3_2_soc.clock_limits[i].dtbclk_mhz = dcn3_2_soc.clock_limits[i-1].dtbclk_mhz; + } else { + dcn3_2_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; + } + } else if (bw_params->clk_table.entries[i].dtbclk_mhz) { + dcn3_2_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; + } + + if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) + dcn3_2_soc.clock_limits[i].socclk_mhz = dcn3_2_soc.clock_limits[i-1].socclk_mhz; + else + dcn3_2_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz; + + if (!dram_speed_mts[i] && i > 0) + dcn3_2_soc.clock_limits[i].dram_speed_mts = dcn3_2_soc.clock_limits[i-1].dram_speed_mts; + else + dcn3_2_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i]; + + /* These clocks cannot come from bw_params, always fill from dcn3_2_soc[0] */ + /* PHYCLK_D18, PHYCLK_D32 */ + dcn3_2_soc.clock_limits[i].phyclk_d18_mhz = dcn3_2_soc.clock_limits[0].phyclk_d18_mhz; + dcn3_2_soc.clock_limits[i].phyclk_d32_mhz = dcn3_2_soc.clock_limits[0].phyclk_d32_mhz; + } + } else { + build_synthetic_soc_states(bw_params, dcn3_2_soc.clock_limits, &dcn3_2_soc.num_states); + } + + /* Re-init DML with updated bb */ + dml_init_instance(&dc->dml, &dcn3_2_soc, &dcn3_2_ip, DML_PROJECT_DCN32); + if (dc->current_state) + dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_2_soc, &dcn3_2_ip, DML_PROJECT_DCN32); + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index 56973debc3483..3ed06ab855bed 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -69,4 +69,6 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, int pipe_cnt, int vlevel); +void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params); + #endif From f7907f6db81ca08f4f8bd9245b8fecd49bc82f6e Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 8 Jul 2022 12:08:34 -0400 Subject: [PATCH 082/134] drm/amd/display: Drop FPU flags from dcn32 Makefile This is the final commit from the FPU isolation for DCN32 and for this reason we can finally remove flags related to FPU. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/Makefile | 28 ------------------- 1 file changed, 28 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile index 932d85fa4262b..e943b643ab6ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn32/Makefile @@ -15,34 +15,6 @@ DCN32 = dcn32_resource.o dcn32_hubbub.o dcn32_hwseq.o dcn32_init.o \ dcn32_dio_stream_encoder.o dcn32_dio_link_encoder.o dcn32_hpo_dp_link_encoder.o \ dcn32_resource_helpers.o dcn32_mpc.o -ifdef CONFIG_X86 -dcn32_ccflags := -mhard-float -msse -endif - -ifdef CONFIG_PPC64 -dcn32_ccflags := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -dcn32_ccflags += -mpreferred-stack-boundary=4 -else -dcn32_ccflags += -msse2 -endif -endif - -CFLAGS_$(AMDDALPATH)/dc/dcn32/dcn32_resource_helpers.o := $(dcn32_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dcn32/dcn32_resource.o := $(dcn32_ccflags) - AMD_DAL_DCN32 = $(addprefix $(AMDDALPATH)/dc/dcn32/,$(DCN32)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN32) From 197485c695430ab03d9f4a96aab4d4f76787f7c9 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 8 Jul 2022 15:53:56 -0400 Subject: [PATCH 083/134] drm/amd/display: Create dcn321_fpu file The file dcn321_resource has a lot of FPU operations that should be inside the dml folder. This commit introduces the dcn321_fpu file and moves some of the FPU operation functions to this new file. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../amd/display/dc/dcn321/dcn321_resource.c | 225 ++--------------- .../amd/display/dc/dcn321/dcn321_resource.h | 3 + drivers/gpu/drm/amd/display/dc/dml/Makefile | 2 + .../amd/display/dc/dml/dcn321/dcn321_fpu.c | 238 ++++++++++++++++++ .../amd/display/dc/dml/dcn321/dcn321_fpu.h | 36 +++ 5 files changed, 296 insertions(+), 208 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index d218c6dd71aa7..6619bcb30de70 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -37,6 +37,8 @@ #include "dcn20/dcn20_resource.h" #include "dcn30/dcn30_resource.h" +#include "dml/dcn321/dcn321_fpu.h" + #include "dcn10/dcn10_ipp.h" #include "dcn30/dcn30_hubbub.h" #include "dcn31/dcn31_hubbub.h" @@ -120,134 +122,6 @@ static const struct IP_BASE DCN_BASE = { { { { 0x00000012, 0x000000C0, 0x000034C #define fixed16_to_double(x) (((double)x) / ((double) (1 << 16))) #define fixed16_to_double_to_cpu(x) fixed16_to_double(le32_to_cpu(x)) -#define DCN3_2_DEFAULT_DET_SIZE 256 - -struct _vcs_dpi_ip_params_st dcn3_21_ip = { - .gpuvm_enable = 0, - .gpuvm_max_page_table_levels = 4, - .hostvm_enable = 0, - .rob_buffer_size_kbytes = 128, - .det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE, - .config_return_buffer_size_in_kbytes = 1280, - .compressed_buffer_segment_size_in_kbytes = 64, - .meta_fifo_size_in_kentries = 22, - .zero_size_buffer_entries = 512, - .compbuf_reserved_space_64b = 256, - .compbuf_reserved_space_zs = 64, - .dpp_output_buffer_pixels = 2560, - .opp_output_buffer_lines = 1, - .pixel_chunk_size_kbytes = 8, - .alpha_pixel_chunk_size_kbytes = 4, // not appearing in spreadsheet, match c code from hw team - .min_pixel_chunk_size_bytes = 1024, - .dcc_meta_buffer_size_bytes = 6272, - .meta_chunk_size_kbytes = 2, - .min_meta_chunk_size_bytes = 256, - .writeback_chunk_size_kbytes = 8, - .ptoi_supported = false, - .num_dsc = 4, - .maximum_dsc_bits_per_component = 12, - .maximum_pixels_per_line_per_dsc_unit = 6016, - .dsc422_native_support = true, - .is_line_buffer_bpp_fixed = true, - .line_buffer_fixed_bpp = 57, - .line_buffer_size_bits = 1171920, //DPP doc, DCN3_2_DisplayMode_73.xlsm still shows as 986880 bits with 48 bpp - .max_line_buffer_lines = 32, - .writeback_interface_buffer_size_kbytes = 90, - .max_num_dpp = 4, - .max_num_otg = 4, - .max_num_hdmi_frl_outputs = 1, - .max_num_wb = 1, - .max_dchub_pscl_bw_pix_per_clk = 4, - .max_pscl_lb_bw_pix_per_clk = 2, - .max_lb_vscl_bw_pix_per_clk = 4, - .max_vscl_hscl_bw_pix_per_clk = 4, - .max_hscl_ratio = 6, - .max_vscl_ratio = 6, - .max_hscl_taps = 8, - .max_vscl_taps = 8, - .dpte_buffer_size_in_pte_reqs_luma = 64, - .dpte_buffer_size_in_pte_reqs_chroma = 34, - .dispclk_ramp_margin_percent = 1, - .max_inter_dcn_tile_repeaters = 8, - .cursor_buffer_size = 16, - .cursor_chunk_size = 2, - .writeback_line_buffer_buffer_size = 0, - .writeback_min_hscl_ratio = 1, - .writeback_min_vscl_ratio = 1, - .writeback_max_hscl_ratio = 1, - .writeback_max_vscl_ratio = 1, - .writeback_max_hscl_taps = 1, - .writeback_max_vscl_taps = 1, - .dppclk_delay_subtotal = 47, - .dppclk_delay_scl = 50, - .dppclk_delay_scl_lb_only = 16, - .dppclk_delay_cnvc_formatter = 28, - .dppclk_delay_cnvc_cursor = 6, - .dispclk_delay_subtotal = 125, - .dynamic_metadata_vm_enabled = false, - .odm_combine_4to1_supported = false, - .dcc_supported = true, - .max_num_dp2p0_outputs = 2, - .max_num_dp2p0_streams = 4, -}; - -struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { - .clock_limits = { - { - .state = 0, - .dcfclk_mhz = 1564.0, - .fabricclk_mhz = 400.0, - .dispclk_mhz = 2150.0, - .dppclk_mhz = 2150.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .phyclk_d32_mhz = 625.0, - .socclk_mhz = 1200.0, - .dscclk_mhz = 716.667, - .dram_speed_mts = 1600.0, - .dtbclk_mhz = 1564.0, - }, - }, - .num_states = 1, - .sr_exit_time_us = 5.20, - .sr_enter_plus_exit_time_us = 9.60, - .sr_exit_z8_time_us = 285.0, - .sr_enter_plus_exit_z8_time_us = 320, - .writeback_latency_us = 12.0, - .round_trip_ping_latency_dcfclk_cycles = 263, - .urgent_latency_pixel_data_only_us = 4.0, - .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, - .urgent_latency_vm_data_only_us = 4.0, - .fclk_change_latency_us = 20, - .usr_retraining_latency_us = 2, - .smn_latency_us = 2, - .mall_allocated_for_dcn_mbytes = 64, - .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, - .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, - .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, - .pct_ideal_sdp_bw_after_urgent = 100.0, - .pct_ideal_fabric_bw_after_urgent = 67.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented - .pct_ideal_dram_bw_after_urgent_strobe = 67.0, - .max_avg_sdp_bw_use_normal_percent = 80.0, - .max_avg_fabric_bw_use_normal_percent = 60.0, - .max_avg_dram_bw_use_normal_strobe_percent = 50.0, - .max_avg_dram_bw_use_normal_percent = 15.0, - .num_chans = 8, - .dram_channel_width_bytes = 2, - .fabric_datapath_to_dcn_data_return_bytes = 64, - .return_bus_width_bytes = 64, - .downspread_percent = 0.38, - .dcn_downspread_percent = 0.5, - .dram_clock_change_latency_us = 400, - .dispclk_dppclk_vco_speed_mhz = 4300.0, - .do_urgent_latency_adjustment = true, - .urgent_latency_adjustment_fabric_clock_component_us = 1.0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, -}; - enum dcn321_clk_src_array_id { DCN321_CLK_SRC_PLL0, DCN321_CLK_SRC_PLL1, @@ -1719,76 +1593,6 @@ static void dcn321_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, (dcn3_21_soc.return_bus_width_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100)); } -static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) -{ - if (entry->dcfclk_mhz > 0) { - float bw_on_sdp = entry->dcfclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100); - - entry->fabricclk_mhz = bw_on_sdp / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100)); - entry->dram_speed_mts = bw_on_sdp / (dcn3_21_soc.num_chans * - dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); - } else if (entry->fabricclk_mhz > 0) { - float bw_on_fabric = entry->fabricclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100); - - entry->dcfclk_mhz = bw_on_fabric / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100)); - entry->dram_speed_mts = bw_on_fabric / (dcn3_21_soc.num_chans * - dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); - } else if (entry->dram_speed_mts > 0) { - float bw_on_dram = entry->dram_speed_mts * dcn3_21_soc.num_chans * - dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); - - entry->fabricclk_mhz = bw_on_dram / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100)); - entry->dcfclk_mhz = bw_on_dram / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100)); - } -} - -static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st *entry) -{ - float memory_bw_kbytes_sec = entry->dram_speed_mts * dcn3_21_soc.num_chans * - dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); - - float fabric_bw_kbytes_sec = entry->fabricclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100); - - float sdp_bw_kbytes_sec = entry->dcfclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100); - - float limiting_bw_kbytes_sec = memory_bw_kbytes_sec; - - if (fabric_bw_kbytes_sec < limiting_bw_kbytes_sec) - limiting_bw_kbytes_sec = fabric_bw_kbytes_sec; - - if (sdp_bw_kbytes_sec < limiting_bw_kbytes_sec) - limiting_bw_kbytes_sec = sdp_bw_kbytes_sec; - - return limiting_bw_kbytes_sec; -} - -static void insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, - struct _vcs_dpi_voltage_scaling_st *entry) -{ - int index = 0; - int i = 0; - float net_bw_of_new_state = 0; - - if (*num_entries == 0) { - table[0] = *entry; - (*num_entries)++; - } else { - net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry); - while (net_bw_of_new_state > calculate_net_bw_in_kbytes_sec(&table[index])) { - index++; - if (index >= *num_entries) - break; - } - - for (i = *num_entries; i > index; i--) { - table[i] = table[i - 1]; - } - - table[index] = *entry; - (*num_entries)++; - } -} - static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, unsigned int index) { @@ -1872,8 +1676,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = 0; entry.dram_speed_mts = 0; - get_optimal_ntuple(&entry); - insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_START(); + dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); } // Insert the max DCFCLK @@ -1881,8 +1686,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = 0; entry.dram_speed_mts = 0; - get_optimal_ntuple(&entry); - insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_START(); + dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); // Insert the UCLK DPMS for (i = 0; i < num_uclk_dpms; i++) { @@ -1890,8 +1696,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = 0; entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16; - get_optimal_ntuple(&entry); - insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_START(); + dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); } // If FCLK is coarse grained, insert individual DPMs. @@ -1901,8 +1708,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; entry.dram_speed_mts = 0; - get_optimal_ntuple(&entry); - insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_START(); + dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); } } // If FCLK fine grained, only insert max @@ -1911,8 +1719,9 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params, entry.fabricclk_mhz = max_fclk_mhz; entry.dram_speed_mts = 0; - get_optimal_ntuple(&entry); - insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_START(); + dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + DC_FP_END(); } // At this point, the table contains all "points of interest" based on diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h index 2732085a0e884..82cbf009f2d38 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.h @@ -31,6 +31,9 @@ #define TO_DCN321_RES_POOL(pool)\ container_of(pool, struct dcn321_resource_pool, base) +extern struct _vcs_dpi_ip_params_st dcn3_21_ip; +extern struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc; + struct dcn321_resource_pool { struct resource_pool base; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 01cb0ef3a2b02..359f6e9a1da04 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -76,6 +76,7 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn321/dcn321_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_ccflags) @@ -126,6 +127,7 @@ DML += dcn31/display_mode_vba_31.o dcn31/display_rq_dlg_calc_31.o DML += dcn32/display_mode_vba_32.o dcn32/display_rq_dlg_calc_32.o dcn32/display_mode_vba_util_32.o DML += dcn31/dcn31_fpu.o DML += dcn32/dcn32_fpu.o +DML += dcn321/dcn321_fpu.o DML += dcn301/dcn301_fpu.o DML += dcn302/dcn302_fpu.o DML += dcn303/dcn303_fpu.o diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c new file mode 100644 index 0000000000000..78408698985bd --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "resource.h" +#include "dcn321_fpu.h" +#include "dcn32/dcn32_resource.h" +#include "dcn321/dcn321_resource.h" + +#define DCN3_2_DEFAULT_DET_SIZE 256 + +struct _vcs_dpi_ip_params_st dcn3_21_ip = { + .gpuvm_enable = 0, + .gpuvm_max_page_table_levels = 4, + .hostvm_enable = 0, + .rob_buffer_size_kbytes = 128, + .det_buffer_size_kbytes = DCN3_2_DEFAULT_DET_SIZE, + .config_return_buffer_size_in_kbytes = 1280, + .compressed_buffer_segment_size_in_kbytes = 64, + .meta_fifo_size_in_kentries = 22, + .zero_size_buffer_entries = 512, + .compbuf_reserved_space_64b = 256, + .compbuf_reserved_space_zs = 64, + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .alpha_pixel_chunk_size_kbytes = 4, + .min_pixel_chunk_size_bytes = 1024, + .dcc_meta_buffer_size_bytes = 6272, + .meta_chunk_size_kbytes = 2, + .min_meta_chunk_size_bytes = 256, + .writeback_chunk_size_kbytes = 8, + .ptoi_supported = false, + .num_dsc = 4, + .maximum_dsc_bits_per_component = 12, + .maximum_pixels_per_line_per_dsc_unit = 6016, + .dsc422_native_support = true, + .is_line_buffer_bpp_fixed = true, + .line_buffer_fixed_bpp = 57, + .line_buffer_size_bits = 1171920, + .max_line_buffer_lines = 32, + .writeback_interface_buffer_size_kbytes = 90, + .max_num_dpp = 4, + .max_num_otg = 4, + .max_num_hdmi_frl_outputs = 1, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 6, + .max_vscl_ratio = 6, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dpte_buffer_size_in_pte_reqs_luma = 64, + .dpte_buffer_size_in_pte_reqs_chroma = 34, + .dispclk_ramp_margin_percent = 1, + .max_inter_dcn_tile_repeaters = 8, + .cursor_buffer_size = 16, + .cursor_chunk_size = 2, + .writeback_line_buffer_buffer_size = 0, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_max_hscl_taps = 1, + .writeback_max_vscl_taps = 1, + .dppclk_delay_subtotal = 47, + .dppclk_delay_scl = 50, + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_cnvc_formatter = 28, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 125, + .dynamic_metadata_vm_enabled = false, + .odm_combine_4to1_supported = false, + .dcc_supported = true, + .max_num_dp2p0_outputs = 2, + .max_num_dp2p0_streams = 4, +}; + +struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { + .clock_limits = { + { + .state = 0, + .dcfclk_mhz = 1564.0, + .fabricclk_mhz = 400.0, + .dispclk_mhz = 2150.0, + .dppclk_mhz = 2150.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .phyclk_d32_mhz = 625.0, + .socclk_mhz = 1200.0, + .dscclk_mhz = 716.667, + .dram_speed_mts = 1600.0, + .dtbclk_mhz = 1564.0, + }, + }, + .num_states = 1, + .sr_exit_time_us = 5.20, + .sr_enter_plus_exit_time_us = 9.60, + .sr_exit_z8_time_us = 285.0, + .sr_enter_plus_exit_z8_time_us = 320, + .writeback_latency_us = 12.0, + .round_trip_ping_latency_dcfclk_cycles = 263, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .fclk_change_latency_us = 20, + .usr_retraining_latency_us = 2, + .smn_latency_us = 2, + .mall_allocated_for_dcn_mbytes = 64, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_sdp_bw_after_urgent = 100.0, + .pct_ideal_fabric_bw_after_urgent = 67.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 20.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, // N/A, for now keep as is until DML implemented + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, // N/A, for now keep as is until DML implemented + .pct_ideal_dram_bw_after_urgent_strobe = 67.0, + .max_avg_sdp_bw_use_normal_percent = 80.0, + .max_avg_fabric_bw_use_normal_percent = 60.0, + .max_avg_dram_bw_use_normal_strobe_percent = 50.0, + .max_avg_dram_bw_use_normal_percent = 15.0, + .num_chans = 8, + .dram_channel_width_bytes = 2, + .fabric_datapath_to_dcn_data_return_bytes = 64, + .return_bus_width_bytes = 64, + .downspread_percent = 0.38, + .dcn_downspread_percent = 0.5, + .dram_clock_change_latency_us = 400, + .dispclk_dppclk_vco_speed_mhz = 4300.0, + .do_urgent_latency_adjustment = true, + .urgent_latency_adjustment_fabric_clock_component_us = 1.0, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, +}; + +static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) +{ + if (entry->dcfclk_mhz > 0) { + float bw_on_sdp = entry->dcfclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100); + + entry->fabricclk_mhz = bw_on_sdp / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100)); + entry->dram_speed_mts = bw_on_sdp / (dcn3_21_soc.num_chans * + dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); + } else if (entry->fabricclk_mhz > 0) { + float bw_on_fabric = entry->fabricclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100); + + entry->dcfclk_mhz = bw_on_fabric / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100)); + entry->dram_speed_mts = bw_on_fabric / (dcn3_21_soc.num_chans * + dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100)); + } else if (entry->dram_speed_mts > 0) { + float bw_on_dram = entry->dram_speed_mts * dcn3_21_soc.num_chans * + dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); + + entry->fabricclk_mhz = bw_on_dram / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100)); + entry->dcfclk_mhz = bw_on_dram / (dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100)); + } +} + +static float calculate_net_bw_in_kbytes_sec(struct _vcs_dpi_voltage_scaling_st *entry) +{ + float memory_bw_kbytes_sec; + float fabric_bw_kbytes_sec; + float sdp_bw_kbytes_sec; + float limiting_bw_kbytes_sec; + + memory_bw_kbytes_sec = entry->dram_speed_mts * dcn3_21_soc.num_chans * + dcn3_21_soc.dram_channel_width_bytes * ((float)dcn3_21_soc.pct_ideal_dram_sdp_bw_after_urgent_pixel_only / 100); + + fabric_bw_kbytes_sec = entry->fabricclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_fabric_bw_after_urgent / 100); + + sdp_bw_kbytes_sec = entry->dcfclk_mhz * dcn3_21_soc.return_bus_width_bytes * ((float)dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / 100); + + limiting_bw_kbytes_sec = memory_bw_kbytes_sec; + + if (fabric_bw_kbytes_sec < limiting_bw_kbytes_sec) + limiting_bw_kbytes_sec = fabric_bw_kbytes_sec; + + if (sdp_bw_kbytes_sec < limiting_bw_kbytes_sec) + limiting_bw_kbytes_sec = sdp_bw_kbytes_sec; + + return limiting_bw_kbytes_sec; +} + +void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, + unsigned int *num_entries, + struct _vcs_dpi_voltage_scaling_st *entry) +{ + int i = 0; + int index = 0; + float net_bw_of_new_state = 0; + + dc_assert_fp_enabled(); + + get_optimal_ntuple(entry); + + if (*num_entries == 0) { + table[0] = *entry; + (*num_entries)++; + } else { + net_bw_of_new_state = calculate_net_bw_in_kbytes_sec(entry); + while (net_bw_of_new_state > calculate_net_bw_in_kbytes_sec(&table[index])) { + index++; + if (index >= *num_entries) + break; + } + + for (i = *num_entries; i > index; i--) + table[i] = table[i - 1]; + + table[index] = *entry; + (*num_entries)++; + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h new file mode 100644 index 0000000000000..5b6b28526e180 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DCN32_FPU_H__ +#define __DCN32_FPU_H__ + +#include "dml/display_mode_vba.h" + +void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, + unsigned int *num_entries, + struct _vcs_dpi_voltage_scaling_st *entry); + +#endif From 352b25a770712a954b1def1b8c6429b0825ee0b3 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 8 Jul 2022 16:07:03 -0400 Subject: [PATCH 084/134] drm/amd/display: Drop FPU code from dcn321 resource This commit fully move the missing FPU operations from dcn321 resource to dcn321 fpu. It also remove those FPU flags from the Makefile. Tested-by: Daniel Wheeler Reviewed-by: Harry Wentland Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn321/Makefile | 25 - .../amd/display/dc/dcn321/dcn321_resource.c | 452 +----------------- .../amd/display/dc/dml/dcn321/dcn321_fpu.c | 446 +++++++++++++++++ .../amd/display/dc/dml/dcn321/dcn321_fpu.h | 2 + 4 files changed, 450 insertions(+), 475 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/Makefile b/drivers/gpu/drm/amd/display/dc/dcn321/Makefile index e554fd6c16f21..0a199c83bb5b3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn321/Makefile @@ -12,31 +12,6 @@ DCN321 = dcn321_resource.o dcn321_dio_link_encoder.o -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn321/dcn321_resource.o := -mhard-float -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/dcn321/dcn321_resource.o := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/dcn321/dcn321_resource.o += -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/dcn321/dcn321_resource.o += -msse2 -endif -endif - AMD_DAL_DCN321 = $(addprefix $(AMDDALPATH)/dc/dcn321/,$(DCN321)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN321) diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index 6619bcb30de70..9ac0fcf79bedf 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -1570,459 +1570,11 @@ static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap }; - -static void dcn321_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, - unsigned int *optimal_dcfclk, - unsigned int *optimal_fclk) -{ - double bw_from_dram, bw_from_dram1, bw_from_dram2; - - bw_from_dram1 = uclk_mts * dcn3_21_soc.num_chans * - dcn3_21_soc.dram_channel_width_bytes * (dcn3_21_soc.max_avg_dram_bw_use_normal_percent / 100); - bw_from_dram2 = uclk_mts * dcn3_21_soc.num_chans * - dcn3_21_soc.dram_channel_width_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100); - - bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2; - - if (optimal_fclk) - *optimal_fclk = bw_from_dram / - (dcn3_21_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100)); - - if (optimal_dcfclk) - *optimal_dcfclk = bw_from_dram / - (dcn3_21_soc.return_bus_width_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100)); -} - -static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, - unsigned int index) -{ - int i; - - if (*num_entries == 0) - return; - - for (i = index; i < *num_entries - 1; i++) { - table[i] = table[i + 1]; - } - memset(&table[--(*num_entries)], 0, sizeof(struct _vcs_dpi_voltage_scaling_st)); -} - -static int build_synthetic_soc_states(struct clk_bw_params *bw_params, - struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) +static void dcn321_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { - int i, j; - struct _vcs_dpi_voltage_scaling_st entry = {0}; - - unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, - max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0; - - unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299; - - static const unsigned int num_dcfclk_stas = 5; - unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; - - unsigned int num_uclk_dpms = 0; - unsigned int num_fclk_dpms = 0; - unsigned int num_dcfclk_dpms = 0; - - for (i = 0; i < MAX_NUM_DPM_LVL; i++) { - if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) - max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; - if (bw_params->clk_table.entries[i].fclk_mhz > max_fclk_mhz) - max_fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; - if (bw_params->clk_table.entries[i].memclk_mhz > max_uclk_mhz) - max_uclk_mhz = bw_params->clk_table.entries[i].memclk_mhz; - if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) - max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; - if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) - max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; - if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) - max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; - if (bw_params->clk_table.entries[i].dtbclk_mhz > max_dtbclk_mhz) - max_dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; - - if (bw_params->clk_table.entries[i].memclk_mhz > 0) - num_uclk_dpms++; - if (bw_params->clk_table.entries[i].fclk_mhz > 0) - num_fclk_dpms++; - if (bw_params->clk_table.entries[i].dcfclk_mhz > 0) - num_dcfclk_dpms++; - } - - if (!max_dcfclk_mhz || !max_dispclk_mhz || !max_dtbclk_mhz) - return -1; - - if (max_dppclk_mhz == 0) - max_dppclk_mhz = max_dispclk_mhz; - - if (max_fclk_mhz == 0) - max_fclk_mhz = max_dcfclk_mhz * dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / dcn3_21_soc.pct_ideal_fabric_bw_after_urgent; - - if (max_phyclk_mhz == 0) - max_phyclk_mhz = dcn3_21_soc.clock_limits[0].phyclk_mhz; - - *num_entries = 0; - entry.dispclk_mhz = max_dispclk_mhz; - entry.dscclk_mhz = max_dispclk_mhz / 3; - entry.dppclk_mhz = max_dppclk_mhz; - entry.dtbclk_mhz = max_dtbclk_mhz; - entry.phyclk_mhz = max_phyclk_mhz; - entry.phyclk_d18_mhz = dcn3_21_soc.clock_limits[0].phyclk_d18_mhz; - entry.phyclk_d32_mhz = dcn3_21_soc.clock_limits[0].phyclk_d32_mhz; - - // Insert all the DCFCLK STAs - for (i = 0; i < num_dcfclk_stas; i++) { - entry.dcfclk_mhz = dcfclk_sta_targets[i]; - entry.fabricclk_mhz = 0; - entry.dram_speed_mts = 0; - - DC_FP_START(); - dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); - DC_FP_END(); - } - - // Insert the max DCFCLK - entry.dcfclk_mhz = max_dcfclk_mhz; - entry.fabricclk_mhz = 0; - entry.dram_speed_mts = 0; - DC_FP_START(); - dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + dcn321_update_bw_bounding_box_fpu(dc, bw_params); DC_FP_END(); - - // Insert the UCLK DPMS - for (i = 0; i < num_uclk_dpms; i++) { - entry.dcfclk_mhz = 0; - entry.fabricclk_mhz = 0; - entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16; - - DC_FP_START(); - dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); - DC_FP_END(); - } - - // If FCLK is coarse grained, insert individual DPMs. - if (num_fclk_dpms > 2) { - for (i = 0; i < num_fclk_dpms; i++) { - entry.dcfclk_mhz = 0; - entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; - entry.dram_speed_mts = 0; - - DC_FP_START(); - dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); - DC_FP_END(); - } - } - // If FCLK fine grained, only insert max - else { - entry.dcfclk_mhz = 0; - entry.fabricclk_mhz = max_fclk_mhz; - entry.dram_speed_mts = 0; - - DC_FP_START(); - dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); - DC_FP_END(); - } - - // At this point, the table contains all "points of interest" based on - // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock - // ratios (by derate, are exact). - - // Remove states that require higher clocks than are supported - for (i = *num_entries - 1; i >= 0 ; i--) { - if (table[i].dcfclk_mhz > max_dcfclk_mhz || - table[i].fabricclk_mhz > max_fclk_mhz || - table[i].dram_speed_mts > max_uclk_mhz * 16) - remove_entry_from_table_at_index(table, num_entries, i); - } - - // At this point, the table only contains supported points of interest - // it could be used as is, but some states may be redundant due to - // coarse grained nature of some clocks, so we want to round up to - // coarse grained DPMs and remove duplicates. - - // Round up UCLKs - for (i = *num_entries - 1; i >= 0 ; i--) { - for (j = 0; j < num_uclk_dpms; j++) { - if (bw_params->clk_table.entries[j].memclk_mhz * 16 >= table[i].dram_speed_mts) { - table[i].dram_speed_mts = bw_params->clk_table.entries[j].memclk_mhz * 16; - break; - } - } - } - - // If FCLK is coarse grained, round up to next DPMs - if (num_fclk_dpms > 2) { - for (i = *num_entries - 1; i >= 0 ; i--) { - for (j = 0; j < num_fclk_dpms; j++) { - if (bw_params->clk_table.entries[j].fclk_mhz >= table[i].fabricclk_mhz) { - table[i].fabricclk_mhz = bw_params->clk_table.entries[j].fclk_mhz; - break; - } - } - } - } - // Otherwise, round up to minimum. - else { - for (i = *num_entries - 1; i >= 0 ; i--) { - if (table[i].fabricclk_mhz < min_fclk_mhz) { - table[i].fabricclk_mhz = min_fclk_mhz; - break; - } - } - } - - // Round DCFCLKs up to minimum - for (i = *num_entries - 1; i >= 0 ; i--) { - if (table[i].dcfclk_mhz < min_dcfclk_mhz) { - table[i].dcfclk_mhz = min_dcfclk_mhz; - break; - } - } - - // Remove duplicate states, note duplicate states are always neighbouring since table is sorted. - i = 0; - while (i < *num_entries - 1) { - if (table[i].dcfclk_mhz == table[i + 1].dcfclk_mhz && - table[i].fabricclk_mhz == table[i + 1].fabricclk_mhz && - table[i].dram_speed_mts == table[i + 1].dram_speed_mts) - remove_entry_from_table_at_index(table, num_entries, i + 1); - else - i++; - } - - // Fix up the state indicies - for (i = *num_entries - 1; i >= 0 ; i--) { - table[i].state = i; - } - - return 0; -} - -/* dcn321_update_bw_bounding_box - * This would override some dcn3_2 ip_or_soc initial parameters hardcoded from spreadsheet - * with actual values as per dGPU SKU: - * -with passed few options from dc->config - * -with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might need to get it from PM FW) - * -with passed latency values (passed in ns units) in dc-> bb override for debugging purposes - * -with passed latencies from VBIOS (in 100_ns units) if available for certain dGPU SKU - * -with number of DRAM channels from VBIOS (which differ for certain dGPU SKU of the same ASIC) - * -clocks levels with passed clk_table entries from Clk Mgr as reported by PM FW for different - * clocks (which might differ for certain dGPU SKU of the same ASIC) - */ -static void dcn321_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) -{ - if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { - /* Overrides from dc->config options */ - dcn3_21_ip.clamp_min_dcfclk = dc->config.clamp_min_dcfclk; - - /* Override from passed dc->bb_overrides if available*/ - if ((int)(dcn3_21_soc.sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns - && dc->bb_overrides.sr_exit_time_ns) { - dcn3_21_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0; - } - - if ((int)(dcn3_21_soc.sr_enter_plus_exit_time_us * 1000) - != dc->bb_overrides.sr_enter_plus_exit_time_ns - && dc->bb_overrides.sr_enter_plus_exit_time_ns) { - dcn3_21_soc.sr_enter_plus_exit_time_us = - dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; - } - - if ((int)(dcn3_21_soc.urgent_latency_us * 1000) != dc->bb_overrides.urgent_latency_ns - && dc->bb_overrides.urgent_latency_ns) { - dcn3_21_soc.urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0; - } - - if ((int)(dcn3_21_soc.dram_clock_change_latency_us * 1000) - != dc->bb_overrides.dram_clock_change_latency_ns - && dc->bb_overrides.dram_clock_change_latency_ns) { - dcn3_21_soc.dram_clock_change_latency_us = - dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; - } - - if ((int)(dcn3_21_soc.dummy_pstate_latency_us * 1000) - != dc->bb_overrides.dummy_clock_change_latency_ns - && dc->bb_overrides.dummy_clock_change_latency_ns) { - dcn3_21_soc.dummy_pstate_latency_us = - dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0; - } - - /* Override from VBIOS if VBIOS bb_info available */ - if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { - struct bp_soc_bb_info bb_info = {0}; - - if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { - if (bb_info.dram_clock_change_latency_100ns > 0) - dcn3_21_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10; - - if (bb_info.dram_sr_enter_exit_latency_100ns > 0) - dcn3_21_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10; - - if (bb_info.dram_sr_exit_latency_100ns > 0) - dcn3_21_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10; - } - } - - /* Override from VBIOS for num_chan */ - if (dc->ctx->dc_bios->vram_info.num_chans) - dcn3_21_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans; - - if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) - dcn3_21_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; - - } - - /* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */ - dcn3_21_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; - dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; - - /* Overrides Clock levelsfrom CLK Mgr table entries as reported by PM FW */ - if ((!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) && (bw_params->clk_table.entries[0].memclk_mhz)) { - if (dc->debug.use_legacy_soc_bb_mechanism) { - unsigned int i = 0, j = 0, num_states = 0; - - unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0}; - unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0}; - unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0}; - unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0}; - - unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {615, 906, 1324, 1564}; - unsigned int num_dcfclk_sta_targets = 4, num_uclk_states = 0; - unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0; - - for (i = 0; i < MAX_NUM_DPM_LVL; i++) { - if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) - max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; - if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) - max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; - if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) - max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; - if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) - max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; - } - if (!max_dcfclk_mhz) - max_dcfclk_mhz = dcn3_21_soc.clock_limits[0].dcfclk_mhz; - if (!max_dispclk_mhz) - max_dispclk_mhz = dcn3_21_soc.clock_limits[0].dispclk_mhz; - if (!max_dppclk_mhz) - max_dppclk_mhz = dcn3_21_soc.clock_limits[0].dppclk_mhz; - if (!max_phyclk_mhz) - max_phyclk_mhz = dcn3_21_soc.clock_limits[0].phyclk_mhz; - - if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { - // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array - dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz; - num_dcfclk_sta_targets++; - } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { - // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates - for (i = 0; i < num_dcfclk_sta_targets; i++) { - if (dcfclk_sta_targets[i] > max_dcfclk_mhz) { - dcfclk_sta_targets[i] = max_dcfclk_mhz; - break; - } - } - // Update size of array since we "removed" duplicates - num_dcfclk_sta_targets = i + 1; - } - - num_uclk_states = bw_params->clk_table.num_entries; - - // Calculate optimal dcfclk for each uclk - for (i = 0; i < num_uclk_states; i++) { - dcn321_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16, - &optimal_dcfclk_for_uclk[i], NULL); - if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) { - optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz; - } - } - - // Calculate optimal uclk for each dcfclk sta target - for (i = 0; i < num_dcfclk_sta_targets; i++) { - for (j = 0; j < num_uclk_states; j++) { - if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { - optimal_uclk_for_dcfclk_sta_targets[i] = - bw_params->clk_table.entries[j].memclk_mhz * 16; - break; - } - } - } - - i = 0; - j = 0; - // create the final dcfclk and uclk table - while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { - if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { - dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; - dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; - } else { - if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { - dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; - dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; - } else { - j = num_uclk_states; - } - } - } - - while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) { - dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; - dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; - } - - while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES && - optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { - dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; - dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; - } - - dcn3_21_soc.num_states = num_states; - for (i = 0; i < dcn3_21_soc.num_states; i++) { - dcn3_21_soc.clock_limits[i].state = i; - dcn3_21_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i]; - dcn3_21_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i]; - - /* Fill all states with max values of all these clocks */ - dcn3_21_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; - dcn3_21_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; - dcn3_21_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; - dcn3_21_soc.clock_limits[i].dscclk_mhz = max_dispclk_mhz / 3; - - /* Populate from bw_params for DTBCLK, SOCCLK */ - if (i > 0) { - if (!bw_params->clk_table.entries[i].dtbclk_mhz) { - dcn3_21_soc.clock_limits[i].dtbclk_mhz = dcn3_21_soc.clock_limits[i-1].dtbclk_mhz; - } else { - dcn3_21_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; - } - } else if (bw_params->clk_table.entries[i].dtbclk_mhz) { - dcn3_21_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; - } - - if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) - dcn3_21_soc.clock_limits[i].socclk_mhz = dcn3_21_soc.clock_limits[i-1].socclk_mhz; - else - dcn3_21_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz; - - if (!dram_speed_mts[i] && i > 0) - dcn3_21_soc.clock_limits[i].dram_speed_mts = dcn3_21_soc.clock_limits[i-1].dram_speed_mts; - else - dcn3_21_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i]; - - /* These clocks cannot come from bw_params, always fill from dcn3_21_soc[0] */ - /* PHYCLK_D18, PHYCLK_D32 */ - dcn3_21_soc.clock_limits[i].phyclk_d18_mhz = dcn3_21_soc.clock_limits[0].phyclk_d18_mhz; - dcn3_21_soc.clock_limits[i].phyclk_d32_mhz = dcn3_21_soc.clock_limits[0].phyclk_d32_mhz; - } - } else { - build_synthetic_soc_states(bw_params, dcn3_21_soc.clock_limits, &dcn3_21_soc.num_states); - } - - /* Re-init DML with updated bb */ - dml_init_instance(&dc->dml, &dcn3_21_soc, &dcn3_21_ip, DML_PROJECT_DCN32); - if (dc->current_state) - dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_21_soc, &dcn3_21_ip, DML_PROJECT_DCN32); - } } static struct resource_funcs dcn321_res_pool_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c index 78408698985bd..6e72336b79755 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -24,6 +24,7 @@ * */ +#include "clk_mgr.h" #include "resource.h" #include "dcn321_fpu.h" #include "dcn32/dcn32_resource.h" @@ -236,3 +237,448 @@ void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *t } } +static void remove_entry_from_table_at_index(struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries, + unsigned int index) +{ + int i; + + if (*num_entries == 0) + return; + + for (i = index; i < *num_entries - 1; i++) { + table[i] = table[i + 1]; + } + memset(&table[--(*num_entries)], 0, sizeof(struct _vcs_dpi_voltage_scaling_st)); +} + +static int build_synthetic_soc_states(struct clk_bw_params *bw_params, + struct _vcs_dpi_voltage_scaling_st *table, unsigned int *num_entries) +{ + int i, j; + struct _vcs_dpi_voltage_scaling_st entry = {0}; + + unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, + max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0; + + unsigned int min_dcfclk_mhz = 199, min_fclk_mhz = 299; + + static const unsigned int num_dcfclk_stas = 5; + unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {199, 615, 906, 1324, 1564}; + + unsigned int num_uclk_dpms = 0; + unsigned int num_fclk_dpms = 0; + unsigned int num_dcfclk_dpms = 0; + + for (i = 0; i < MAX_NUM_DPM_LVL; i++) { + if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) + max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; + if (bw_params->clk_table.entries[i].fclk_mhz > max_fclk_mhz) + max_fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; + if (bw_params->clk_table.entries[i].memclk_mhz > max_uclk_mhz) + max_uclk_mhz = bw_params->clk_table.entries[i].memclk_mhz; + if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; + if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; + if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) + max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; + if (bw_params->clk_table.entries[i].dtbclk_mhz > max_dtbclk_mhz) + max_dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; + + if (bw_params->clk_table.entries[i].memclk_mhz > 0) + num_uclk_dpms++; + if (bw_params->clk_table.entries[i].fclk_mhz > 0) + num_fclk_dpms++; + if (bw_params->clk_table.entries[i].dcfclk_mhz > 0) + num_dcfclk_dpms++; + } + + if (!max_dcfclk_mhz || !max_dispclk_mhz || !max_dtbclk_mhz) + return -1; + + if (max_dppclk_mhz == 0) + max_dppclk_mhz = max_dispclk_mhz; + + if (max_fclk_mhz == 0) + max_fclk_mhz = max_dcfclk_mhz * dcn3_21_soc.pct_ideal_sdp_bw_after_urgent / dcn3_21_soc.pct_ideal_fabric_bw_after_urgent; + + if (max_phyclk_mhz == 0) + max_phyclk_mhz = dcn3_21_soc.clock_limits[0].phyclk_mhz; + + *num_entries = 0; + entry.dispclk_mhz = max_dispclk_mhz; + entry.dscclk_mhz = max_dispclk_mhz / 3; + entry.dppclk_mhz = max_dppclk_mhz; + entry.dtbclk_mhz = max_dtbclk_mhz; + entry.phyclk_mhz = max_phyclk_mhz; + entry.phyclk_d18_mhz = dcn3_21_soc.clock_limits[0].phyclk_d18_mhz; + entry.phyclk_d32_mhz = dcn3_21_soc.clock_limits[0].phyclk_d32_mhz; + + // Insert all the DCFCLK STAs + for (i = 0; i < num_dcfclk_stas; i++) { + entry.dcfclk_mhz = dcfclk_sta_targets[i]; + entry.fabricclk_mhz = 0; + entry.dram_speed_mts = 0; + + dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + } + + // Insert the max DCFCLK + entry.dcfclk_mhz = max_dcfclk_mhz; + entry.fabricclk_mhz = 0; + entry.dram_speed_mts = 0; + + dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + + // Insert the UCLK DPMS + for (i = 0; i < num_uclk_dpms; i++) { + entry.dcfclk_mhz = 0; + entry.fabricclk_mhz = 0; + entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16; + + dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + } + + // If FCLK is coarse grained, insert individual DPMs. + if (num_fclk_dpms > 2) { + for (i = 0; i < num_fclk_dpms; i++) { + entry.dcfclk_mhz = 0; + entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; + entry.dram_speed_mts = 0; + + dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + } + } + // If FCLK fine grained, only insert max + else { + entry.dcfclk_mhz = 0; + entry.fabricclk_mhz = max_fclk_mhz; + entry.dram_speed_mts = 0; + + dcn321_insert_entry_into_table_sorted(table, num_entries, &entry); + } + + // At this point, the table contains all "points of interest" based on + // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock + // ratios (by derate, are exact). + + // Remove states that require higher clocks than are supported + for (i = *num_entries - 1; i >= 0 ; i--) { + if (table[i].dcfclk_mhz > max_dcfclk_mhz || + table[i].fabricclk_mhz > max_fclk_mhz || + table[i].dram_speed_mts > max_uclk_mhz * 16) + remove_entry_from_table_at_index(table, num_entries, i); + } + + // At this point, the table only contains supported points of interest + // it could be used as is, but some states may be redundant due to + // coarse grained nature of some clocks, so we want to round up to + // coarse grained DPMs and remove duplicates. + + // Round up UCLKs + for (i = *num_entries - 1; i >= 0 ; i--) { + for (j = 0; j < num_uclk_dpms; j++) { + if (bw_params->clk_table.entries[j].memclk_mhz * 16 >= table[i].dram_speed_mts) { + table[i].dram_speed_mts = bw_params->clk_table.entries[j].memclk_mhz * 16; + break; + } + } + } + + // If FCLK is coarse grained, round up to next DPMs + if (num_fclk_dpms > 2) { + for (i = *num_entries - 1; i >= 0 ; i--) { + for (j = 0; j < num_fclk_dpms; j++) { + if (bw_params->clk_table.entries[j].fclk_mhz >= table[i].fabricclk_mhz) { + table[i].fabricclk_mhz = bw_params->clk_table.entries[j].fclk_mhz; + break; + } + } + } + } + // Otherwise, round up to minimum. + else { + for (i = *num_entries - 1; i >= 0 ; i--) { + if (table[i].fabricclk_mhz < min_fclk_mhz) { + table[i].fabricclk_mhz = min_fclk_mhz; + break; + } + } + } + + // Round DCFCLKs up to minimum + for (i = *num_entries - 1; i >= 0 ; i--) { + if (table[i].dcfclk_mhz < min_dcfclk_mhz) { + table[i].dcfclk_mhz = min_dcfclk_mhz; + break; + } + } + + // Remove duplicate states, note duplicate states are always neighbouring since table is sorted. + i = 0; + while (i < *num_entries - 1) { + if (table[i].dcfclk_mhz == table[i + 1].dcfclk_mhz && + table[i].fabricclk_mhz == table[i + 1].fabricclk_mhz && + table[i].dram_speed_mts == table[i + 1].dram_speed_mts) + remove_entry_from_table_at_index(table, num_entries, i + 1); + else + i++; + } + + // Fix up the state indicies + for (i = *num_entries - 1; i >= 0 ; i--) { + table[i].state = i; + } + + return 0; +} + +static void dcn321_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, + unsigned int *optimal_dcfclk, + unsigned int *optimal_fclk) +{ + double bw_from_dram, bw_from_dram1, bw_from_dram2; + + bw_from_dram1 = uclk_mts * dcn3_21_soc.num_chans * + dcn3_21_soc.dram_channel_width_bytes * (dcn3_21_soc.max_avg_dram_bw_use_normal_percent / 100); + bw_from_dram2 = uclk_mts * dcn3_21_soc.num_chans * + dcn3_21_soc.dram_channel_width_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100); + + bw_from_dram = (bw_from_dram1 < bw_from_dram2) ? bw_from_dram1 : bw_from_dram2; + + if (optimal_fclk) + *optimal_fclk = bw_from_dram / + (dcn3_21_soc.fabric_datapath_to_dcn_data_return_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100)); + + if (optimal_dcfclk) + *optimal_dcfclk = bw_from_dram / + (dcn3_21_soc.return_bus_width_bytes * (dcn3_21_soc.max_avg_sdp_bw_use_normal_percent / 100)); +} + +/** dcn321_update_bw_bounding_box + * This would override some dcn3_2 ip_or_soc initial parameters hardcoded from spreadsheet + * with actual values as per dGPU SKU: + * -with passed few options from dc->config + * -with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might need to get it from PM FW) + * -with passed latency values (passed in ns units) in dc-> bb override for debugging purposes + * -with passed latencies from VBIOS (in 100_ns units) if available for certain dGPU SKU + * -with number of DRAM channels from VBIOS (which differ for certain dGPU SKU of the same ASIC) + * -clocks levels with passed clk_table entries from Clk Mgr as reported by PM FW for different + * clocks (which might differ for certain dGPU SKU of the same ASIC) + */ +void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params) +{ + dc_assert_fp_enabled(); + if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { + /* Overrides from dc->config options */ + dcn3_21_ip.clamp_min_dcfclk = dc->config.clamp_min_dcfclk; + + /* Override from passed dc->bb_overrides if available*/ + if ((int)(dcn3_21_soc.sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns + && dc->bb_overrides.sr_exit_time_ns) { + dcn3_21_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0; + } + + if ((int)(dcn3_21_soc.sr_enter_plus_exit_time_us * 1000) + != dc->bb_overrides.sr_enter_plus_exit_time_ns + && dc->bb_overrides.sr_enter_plus_exit_time_ns) { + dcn3_21_soc.sr_enter_plus_exit_time_us = + dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0; + } + + if ((int)(dcn3_21_soc.urgent_latency_us * 1000) != dc->bb_overrides.urgent_latency_ns + && dc->bb_overrides.urgent_latency_ns) { + dcn3_21_soc.urgent_latency_us = dc->bb_overrides.urgent_latency_ns / 1000.0; + } + + if ((int)(dcn3_21_soc.dram_clock_change_latency_us * 1000) + != dc->bb_overrides.dram_clock_change_latency_ns + && dc->bb_overrides.dram_clock_change_latency_ns) { + dcn3_21_soc.dram_clock_change_latency_us = + dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; + } + + if ((int)(dcn3_21_soc.dummy_pstate_latency_us * 1000) + != dc->bb_overrides.dummy_clock_change_latency_ns + && dc->bb_overrides.dummy_clock_change_latency_ns) { + dcn3_21_soc.dummy_pstate_latency_us = + dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0; + } + + /* Override from VBIOS if VBIOS bb_info available */ + if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { + struct bp_soc_bb_info bb_info = {0}; + + if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { + if (bb_info.dram_clock_change_latency_100ns > 0) + dcn3_21_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10; + + if (bb_info.dram_sr_enter_exit_latency_100ns > 0) + dcn3_21_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10; + + if (bb_info.dram_sr_exit_latency_100ns > 0) + dcn3_21_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10; + } + } + + /* Override from VBIOS for num_chan */ + if (dc->ctx->dc_bios->vram_info.num_chans) + dcn3_21_soc.num_chans = dc->ctx->dc_bios->vram_info.num_chans; + + if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes) + dcn3_21_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes; + + } + + /* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */ + dcn3_21_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + + /* Overrides Clock levelsfrom CLK Mgr table entries as reported by PM FW */ + if ((!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) && (bw_params->clk_table.entries[0].memclk_mhz)) { + if (dc->debug.use_legacy_soc_bb_mechanism) { + unsigned int i = 0, j = 0, num_states = 0; + + unsigned int dcfclk_mhz[DC__VOLTAGE_STATES] = {0}; + unsigned int dram_speed_mts[DC__VOLTAGE_STATES] = {0}; + unsigned int optimal_uclk_for_dcfclk_sta_targets[DC__VOLTAGE_STATES] = {0}; + unsigned int optimal_dcfclk_for_uclk[DC__VOLTAGE_STATES] = {0}; + + unsigned int dcfclk_sta_targets[DC__VOLTAGE_STATES] = {615, 906, 1324, 1564}; + unsigned int num_dcfclk_sta_targets = 4, num_uclk_states = 0; + unsigned int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0; + + for (i = 0; i < MAX_NUM_DPM_LVL; i++) { + if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz) + max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; + if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; + if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; + if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz) + max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; + } + if (!max_dcfclk_mhz) + max_dcfclk_mhz = dcn3_21_soc.clock_limits[0].dcfclk_mhz; + if (!max_dispclk_mhz) + max_dispclk_mhz = dcn3_21_soc.clock_limits[0].dispclk_mhz; + if (!max_dppclk_mhz) + max_dppclk_mhz = dcn3_21_soc.clock_limits[0].dppclk_mhz; + if (!max_phyclk_mhz) + max_phyclk_mhz = dcn3_21_soc.clock_limits[0].phyclk_mhz; + + if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { + // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array + dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz; + num_dcfclk_sta_targets++; + } else if (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { + // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates + for (i = 0; i < num_dcfclk_sta_targets; i++) { + if (dcfclk_sta_targets[i] > max_dcfclk_mhz) { + dcfclk_sta_targets[i] = max_dcfclk_mhz; + break; + } + } + // Update size of array since we "removed" duplicates + num_dcfclk_sta_targets = i + 1; + } + + num_uclk_states = bw_params->clk_table.num_entries; + + // Calculate optimal dcfclk for each uclk + for (i = 0; i < num_uclk_states; i++) { + dcn321_get_optimal_dcfclk_fclk_for_uclk(bw_params->clk_table.entries[i].memclk_mhz * 16, + &optimal_dcfclk_for_uclk[i], NULL); + if (optimal_dcfclk_for_uclk[i] < bw_params->clk_table.entries[0].dcfclk_mhz) { + optimal_dcfclk_for_uclk[i] = bw_params->clk_table.entries[0].dcfclk_mhz; + } + } + + // Calculate optimal uclk for each dcfclk sta target + for (i = 0; i < num_dcfclk_sta_targets; i++) { + for (j = 0; j < num_uclk_states; j++) { + if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j]) { + optimal_uclk_for_dcfclk_sta_targets[i] = + bw_params->clk_table.entries[j].memclk_mhz * 16; + break; + } + } + } + + i = 0; + j = 0; + // create the final dcfclk and uclk table + while (i < num_dcfclk_sta_targets && j < num_uclk_states && num_states < DC__VOLTAGE_STATES) { + if (dcfclk_sta_targets[i] < optimal_dcfclk_for_uclk[j] && i < num_dcfclk_sta_targets) { + dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; + dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; + } else { + if (j < num_uclk_states && optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { + dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; + dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; + } else { + j = num_uclk_states; + } + } + } + + while (i < num_dcfclk_sta_targets && num_states < DC__VOLTAGE_STATES) { + dcfclk_mhz[num_states] = dcfclk_sta_targets[i]; + dram_speed_mts[num_states++] = optimal_uclk_for_dcfclk_sta_targets[i++]; + } + + while (j < num_uclk_states && num_states < DC__VOLTAGE_STATES && + optimal_dcfclk_for_uclk[j] <= max_dcfclk_mhz) { + dcfclk_mhz[num_states] = optimal_dcfclk_for_uclk[j]; + dram_speed_mts[num_states++] = bw_params->clk_table.entries[j++].memclk_mhz * 16; + } + + dcn3_21_soc.num_states = num_states; + for (i = 0; i < dcn3_21_soc.num_states; i++) { + dcn3_21_soc.clock_limits[i].state = i; + dcn3_21_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i]; + dcn3_21_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i]; + + /* Fill all states with max values of all these clocks */ + dcn3_21_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; + dcn3_21_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; + dcn3_21_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; + dcn3_21_soc.clock_limits[i].dscclk_mhz = max_dispclk_mhz / 3; + + /* Populate from bw_params for DTBCLK, SOCCLK */ + if (i > 0) { + if (!bw_params->clk_table.entries[i].dtbclk_mhz) { + dcn3_21_soc.clock_limits[i].dtbclk_mhz = dcn3_21_soc.clock_limits[i-1].dtbclk_mhz; + } else { + dcn3_21_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; + } + } else if (bw_params->clk_table.entries[i].dtbclk_mhz) { + dcn3_21_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; + } + + if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) + dcn3_21_soc.clock_limits[i].socclk_mhz = dcn3_21_soc.clock_limits[i-1].socclk_mhz; + else + dcn3_21_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz; + + if (!dram_speed_mts[i] && i > 0) + dcn3_21_soc.clock_limits[i].dram_speed_mts = dcn3_21_soc.clock_limits[i-1].dram_speed_mts; + else + dcn3_21_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i]; + + /* These clocks cannot come from bw_params, always fill from dcn3_21_soc[0] */ + /* PHYCLK_D18, PHYCLK_D32 */ + dcn3_21_soc.clock_limits[i].phyclk_d18_mhz = dcn3_21_soc.clock_limits[0].phyclk_d18_mhz; + dcn3_21_soc.clock_limits[i].phyclk_d32_mhz = dcn3_21_soc.clock_limits[0].phyclk_d32_mhz; + } + } else { + build_synthetic_soc_states(bw_params, dcn3_21_soc.clock_limits, &dcn3_21_soc.num_states); + } + + /* Re-init DML with updated bb */ + dml_init_instance(&dc->dml, &dcn3_21_soc, &dcn3_21_ip, DML_PROJECT_DCN32); + if (dc->current_state) + dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_21_soc, &dcn3_21_ip, DML_PROJECT_DCN32); + } +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h index 5b6b28526e180..e8fad9b4be693 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.h @@ -33,4 +33,6 @@ void dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *t unsigned int *num_entries, struct _vcs_dpi_voltage_scaling_st *entry); +void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params); + #endif From 9a1dadb6d33173427b6f6c89ab1ddd0eca636afb Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Mon, 11 Jul 2022 00:43:58 -0400 Subject: [PATCH 085/134] drm/amd/display: 3.2.195 This version brings along following fixes: - Isolate FPU operation for DCN32/321 under the DML folder - Create a specific file for CRTC and plane based on amdgpu_dm - Fix DSC issues - Update DML logic Acked-by: Alan Liu Signed-off-by: Aric Cyr Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index be41f9fcf1ddc..d05bbe193bfa7 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -47,7 +47,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.194" +#define DC_VER "3.2.195" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 39a6f3fe98ff03baab364acfec69a6f9803ce22c Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Wed, 20 Jul 2022 18:32:04 -0100 Subject: [PATCH 086/134] drm/amd/display: fix soft-fp vs hard-fp on DCN 3.1 family for powerpc Move remaining FPU code to DML folder that caused compilation error for powerpc. This patch depends on [1] to prevent the error below: /gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o uses hard float, drivers/gpu/drm/amd/amdgpu/../display/dc/dcn31/dcn31_resource.o uses soft float /gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: failed to merge target specific data of file drivers/gpu/drm/amd/amdgpu/../display/dc/dcn31/dcn31_resource.o /gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o uses hard float, drivers/gpu/drm/amd/amdgpu/../display/dc/dcn315/dcn315_resource.o uses soft float /gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: failed to merge target specific data of file drivers/gpu/drm/amd/amdgpu/../display/dc/dcn315/dcn315_resource.o /gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o uses hard float, drivers/gpu/drm/amd/amdgpu/../display/dc/dcn316/dcn316_resource.o uses soft float /gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: failed to merge target specific data of file drivers/gpu/drm/amd/amdgpu/../display/dc/dcn316/dcn316_resource.o [1] https://lore.kernel.org/amd-gfx/20220716195144.342960-1-mwen@igalia.com/ Reported-by: Guenter Roeck Signed-off-by: Melissa Wen Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 5 +++-- .../gpu/drm/amd/display/dc/dcn315/dcn315_resource.c | 5 +++-- .../gpu/drm/amd/display/dc/dcn316/dcn316_resource.c | 5 +++-- drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c | 11 +++++++++++ drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h | 3 +++ 5 files changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 178d40c0d70ae..929b712cbada3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -1663,11 +1663,12 @@ int dcn31_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.src.immediate_flip = true; pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; pipes[pipe_cnt].pipe.src.gpuvm = true; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; pipes[pipe_cnt].pipe.src.dcc_rate = 3; pipes[pipe_cnt].dout.dsc_input_bpc = 0; + DC_FP_START(); + dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); + DC_FP_END(); if (dc->debug.dml_hostvm_override == DML_HOSTVM_NO_OVERRIDE) pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active; diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c index df2abd8fe2eb3..1a5f5977f962b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c @@ -1658,11 +1658,12 @@ static int dcn315_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; pipes[pipe_cnt].pipe.src.gpuvm = true; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; pipes[pipe_cnt].pipe.src.dcc_rate = 3; pipes[pipe_cnt].dout.dsc_input_bpc = 0; + DC_FP_START(); + dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); + DC_FP_END(); if (pipes[pipe_cnt].dout.dsc_enable) { switch (timing->display_color_depth) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c index 070fe10a004e5..53dea466348fc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c @@ -1661,11 +1661,12 @@ static int dcn316_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; pipes[pipe_cnt].pipe.src.gpuvm = true; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; pipes[pipe_cnt].pipe.src.dcc_rate = 3; pipes[pipe_cnt].dout.dsc_input_bpc = 0; + DC_FP_START(); + dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt); + DC_FP_END(); if (pipes[pipe_cnt].dout.dsc_enable) { switch (timing->display_color_depth) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c index facac3daeaca6..e36cfa5985ea9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c @@ -435,8 +435,19 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = { .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, }; +void dcn31_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes, + int pipe_cnt) +{ + dc_assert_fp_enabled(); + + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; +} + void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) { + dc_assert_fp_enabled(); + if (dc->clk_mgr->bw_params->wm_table.entries[WM_A].valid) { context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].pstate_latency_us; context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.entries[WM_A].sr_enter_plus_exit_time_us; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h index 0a10de80c1a42..4372f17b55d4e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h @@ -31,6 +31,9 @@ #define DCN3_15_MIN_COMPBUF_SIZE_KB 128 #define DCN3_16_DEFAULT_DET_SIZE 192 +void dcn31_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes, + int pipe_cnt); + void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context); void dcn31_calculate_wm_and_dlg_fp( From ca0273ab447853c014983951e2183991f8dbc7b9 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Wed, 20 Jul 2022 18:32:05 -0100 Subject: [PATCH 087/134] drm/amd/display: remove useless FPU protection wrapper from dcn31_resource file Many lines of code in dcn31_resource_construct are wrapped by DC_FP macro to protect FPU operations; however, there is no FPU in this region. Therefore, just remove the wrapper for clarity. Signed-off-by: Melissa Wen Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 929b712cbada3..6d25fcf865bfc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -1863,8 +1863,6 @@ static bool dcn31_resource_construct( struct dc_context *ctx = dc->ctx; struct irq_service_init_data init_data; - DC_FP_START(); - ctx->dc_bios->regs = &bios_regs; pool->base.res_cap = &res_cap_dcn31; @@ -2175,13 +2173,9 @@ static bool dcn31_resource_construct( dc->dcn_ip->max_num_dpp = dcn3_1_ip.max_num_dpp; - DC_FP_END(); - return true; create_fail: - - DC_FP_END(); dcn31_resource_destruct(pool); return false; From 1a3408259bfd5a383b3c7aa7c309c9fb81e35ce0 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Wed, 20 Jul 2022 18:32:06 -0100 Subject: [PATCH 088/134] drm/amd/display: move FPU code on dcn21 clk_mgr The -mno-gnu-attribute option in dcn21 clk mgr makefile hides a soft vs hard fp error for powerpc. After removing this flag, we can see some FPU code remains there: /gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o uses hard float, drivers/gpu/drm/amd/amdgpu/../display/dc/clk_mgr/dcn21/rn_clk_mgr.o uses soft float Therefore, remove the -mno-gnu-attribute flag for dcn21/powerpc and move FPU-associated code to DML folder. Signed-off-by: Melissa Wen Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/clk_mgr/Makefile | 6 - .../amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 234 +---------------- .../amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h | 7 + .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 235 ++++++++++++++++++ .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.h | 2 + 5 files changed, 248 insertions(+), 236 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile index a48453612d10c..66dc02c426e95 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile @@ -107,12 +107,6 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN201) ############################################################################### CLK_MGR_DCN21 = rn_clk_mgr.o rn_clk_mgr_vbios_smu.o -# prevent build errors regarding soft-float vs hard-float FP ABI tags -# this code is currently unused on ppc64, as it applies to Renoir APUs only -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn21/rn_clk_mgr.o := $(call cc-option,-mno-gnu-attribute) -endif - AMD_DAL_CLK_MGR_DCN21 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn21/,$(CLK_MGR_DCN21)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index cf1b5f354ae99..0202dc682682b 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -26,10 +26,9 @@ #include "dccg.h" #include "clk_mgr_internal.h" - #include "dcn20/dcn20_clk_mgr.h" #include "rn_clk_mgr.h" - +#include "dml/dcn20/dcn20_fpu.h" #include "dce100/dce_clk_mgr.h" #include "rn_clk_mgr_vbios_smu.h" @@ -45,7 +44,6 @@ /* Constants */ -#define LPDDR_MEM_RETRAIN_LATENCY 4.977 /* Number obtained from LPDDR4 Training Counter Requirement doc */ #define SMU_VER_55_51_0 0x373300 /* SMU Version that is able to set DISPCLK below 100MHz */ /* Macros */ @@ -613,228 +611,6 @@ static struct clk_bw_params rn_bw_params = { }; -static struct wm_table ddr4_wm_table_gs = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 7.09, - .sr_enter_plus_exit_time_us = 8.14, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 10.12, - .sr_enter_plus_exit_time_us = 11.48, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 10.12, - .sr_enter_plus_exit_time_us = 11.48, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 10.12, - .sr_enter_plus_exit_time_us = 11.48, - .valid = true, - }, - } -}; - -static struct wm_table lpddr4_wm_table_gs = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 5.32, - .sr_enter_plus_exit_time_us = 6.38, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.82, - .sr_enter_plus_exit_time_us = 11.196, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.89, - .sr_enter_plus_exit_time_us = 11.24, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.748, - .sr_enter_plus_exit_time_us = 11.102, - .valid = true, - }, - } -}; - -static struct wm_table lpddr4_wm_table_with_disabled_ppt = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 8.32, - .sr_enter_plus_exit_time_us = 9.38, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.82, - .sr_enter_plus_exit_time_us = 11.196, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.89, - .sr_enter_plus_exit_time_us = 11.24, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.748, - .sr_enter_plus_exit_time_us = 11.102, - .valid = true, - }, - } -}; - -static struct wm_table ddr4_wm_table_rn = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 11.90, - .sr_enter_plus_exit_time_us = 12.80, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 13.18, - .sr_enter_plus_exit_time_us = 14.30, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 13.18, - .sr_enter_plus_exit_time_us = 14.30, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 13.18, - .sr_enter_plus_exit_time_us = 14.30, - .valid = true, - }, - } -}; - -static struct wm_table ddr4_1R_wm_table_rn = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 13.90, - .sr_enter_plus_exit_time_us = 14.80, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 13.90, - .sr_enter_plus_exit_time_us = 14.80, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 13.90, - .sr_enter_plus_exit_time_us = 14.80, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 13.90, - .sr_enter_plus_exit_time_us = 14.80, - .valid = true, - }, - } -}; - -static struct wm_table lpddr4_wm_table_rn = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 7.32, - .sr_enter_plus_exit_time_us = 8.38, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.82, - .sr_enter_plus_exit_time_us = 11.196, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.89, - .sr_enter_plus_exit_time_us = 11.24, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.748, - .sr_enter_plus_exit_time_us = 11.102, - .valid = true, - }, - } -}; - static unsigned int find_socclk_for_voltage(struct dpm_clocks *clock_table, unsigned int voltage) { int i; @@ -914,12 +690,10 @@ static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params /* * WM set D will be re-purposed for memory retraining */ - bw_params->wm_table.entries[WM_D].pstate_latency_us = LPDDR_MEM_RETRAIN_LATENCY; - bw_params->wm_table.entries[WM_D].wm_inst = WM_D; - bw_params->wm_table.entries[WM_D].wm_type = WM_TYPE_RETRAINING; - bw_params->wm_table.entries[WM_D].valid = true; + DC_FP_START(); + dcn21_clk_mgr_set_bw_params_wm_table(bw_params); + DC_FP_END(); } - } void rn_clk_mgr_construct( diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h index e4322fa5475b6..2e088c5171b28 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h @@ -29,6 +29,13 @@ #include "clk_mgr.h" #include "dm_pp_smu.h" +extern struct wm_table ddr4_wm_table_gs; +extern struct wm_table lpddr4_wm_table_gs; +extern struct wm_table lpddr4_wm_table_with_disabled_ppt; +extern struct wm_table ddr4_wm_table_rn; +extern struct wm_table ddr4_1R_wm_table_rn; +extern struct wm_table lpddr4_wm_table_rn; + struct rn_clk_registers { uint32_t CLK1_CLK0_CURRENT_CNT; /* DPREFCLK */ }; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index dc60b835e9382..eeeae52fe6fc7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -42,6 +42,9 @@ #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) #endif +/* Constant */ +#define LPDDR_MEM_RETRAIN_LATENCY 4.977 /* Number obtained from LPDDR4 Training Counter Requirement doc */ + /** * DOC: DCN2x FPU manipulation Overview * @@ -650,6 +653,228 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = { .num_states = 8 }; +struct wm_table ddr4_wm_table_gs = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 7.09, + .sr_enter_plus_exit_time_us = 8.14, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, + }, + } +}; + +struct wm_table lpddr4_wm_table_gs = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 5.32, + .sr_enter_plus_exit_time_us = 6.38, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.82, + .sr_enter_plus_exit_time_us = 11.196, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.89, + .sr_enter_plus_exit_time_us = 11.24, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.748, + .sr_enter_plus_exit_time_us = 11.102, + .valid = true, + }, + } +}; + +struct wm_table lpddr4_wm_table_with_disabled_ppt = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 8.32, + .sr_enter_plus_exit_time_us = 9.38, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.82, + .sr_enter_plus_exit_time_us = 11.196, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.89, + .sr_enter_plus_exit_time_us = 11.24, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.748, + .sr_enter_plus_exit_time_us = 11.102, + .valid = true, + }, + } +}; + +struct wm_table ddr4_wm_table_rn = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 11.90, + .sr_enter_plus_exit_time_us = 12.80, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.18, + .sr_enter_plus_exit_time_us = 14.30, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.18, + .sr_enter_plus_exit_time_us = 14.30, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.18, + .sr_enter_plus_exit_time_us = 14.30, + .valid = true, + }, + } +}; + +struct wm_table ddr4_1R_wm_table_rn = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.90, + .sr_enter_plus_exit_time_us = 14.80, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.90, + .sr_enter_plus_exit_time_us = 14.80, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.90, + .sr_enter_plus_exit_time_us = 14.80, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 13.90, + .sr_enter_plus_exit_time_us = 14.80, + .valid = true, + }, + } +}; + +struct wm_table lpddr4_wm_table_rn = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 7.32, + .sr_enter_plus_exit_time_us = 8.38, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.82, + .sr_enter_plus_exit_time_us = 11.196, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.89, + .sr_enter_plus_exit_time_us = 11.24, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 9.748, + .sr_enter_plus_exit_time_us = 11.102, + .valid = true, + }, + } +}; + void dcn20_populate_dml_writeback_from_context(struct dc *dc, struct resource_context *res_ctx, display_e2e_pipe_params_st *pipes) @@ -2068,3 +2293,13 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params dml_init_instance(&dc->dml, &dcn2_1_soc, &dcn2_1_ip, DML_PROJECT_DCN21); } + +void dcn21_clk_mgr_set_bw_params_wm_table(struct clk_bw_params *bw_params) +{ + dc_assert_fp_enabled(); + + bw_params->wm_table.entries[WM_D].pstate_latency_us = LPDDR_MEM_RETRAIN_LATENCY; + bw_params->wm_table.entries[WM_D].wm_inst = WM_D; + bw_params->wm_table.entries[WM_D].wm_type = WM_TYPE_RETRAINING; + bw_params->wm_table.entries[WM_D].valid = true; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h index aa892193e4854..a6e1ad0f38e9c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h @@ -82,4 +82,6 @@ bool dcn21_validate_bandwidth_fp(struct dc *dc, bool fast_validate); void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); +void dcn21_clk_mgr_set_bw_params_wm_table(struct clk_bw_params *bw_params); + #endif /* __DCN20_FPU_H__ */ From ddd0fa1f47edd794736e70d165341a5c4ff78e47 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Wed, 20 Jul 2022 18:32:07 -0100 Subject: [PATCH 089/134] drm/amd/display: move FPU code from dcn30 clk mgr to DML folder The -mno-gnu-attribute option in clk mgr makefile for dcn30 hides a soft vs hard fp error for powerpc. After removing this flag, we can see some FPU code remains there: gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o uses hard float, drivers/gpu/drm/amd/amdgpu/../display/dc/clk_mgr/dcn30/dcn30_clk_mgr.o uses soft float Therefore, remove the -mno-gnu-attribute flag for dcn30/powerpc and move FPU-associated code to DML folder. Signed-off-by: Melissa Wen Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/clk_mgr/Makefile | 6 -- .../display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c | 63 ++----------------- .../drm/amd/display/dc/dml/dcn30/dcn30_fpu.c | 63 ++++++++++++++++++- .../drm/amd/display/dc/dml/dcn30/dcn30_fpu.h | 1 + 4 files changed, 68 insertions(+), 65 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile index 66dc02c426e95..15b660a951a57 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile @@ -115,12 +115,6 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21) ############################################################################### CLK_MGR_DCN30 = dcn30_clk_mgr.o dcn30_clk_mgr_smu_msg.o -# prevent build errors regarding soft-float vs hard-float FP ABI tags -# this code is currently unused on ppc64, as it applies to VanGogh APUs only -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn30/dcn30_clk_mgr.o := $(call cc-option,-mno-gnu-attribute) -endif - AMD_DAL_CLK_MGR_DCN30 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn30/,$(CLK_MGR_DCN30)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN30) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c index 914708cefc795..3ce0ee0d012f3 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c @@ -29,6 +29,7 @@ #include "dcn20/dcn20_clk_mgr.h" #include "dce100/dce_clk_mgr.h" #include "dcn30/dcn30_clk_mgr.h" +#include "dml/dcn30/dcn30_fpu.h" #include "reg_helper.h" #include "core_types.h" #include "dm_helpers.h" @@ -97,65 +98,11 @@ static void dcn3_init_single_clock(struct clk_mgr_internal *clk_mgr, uint32_t cl } } -static noinline void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr) +static void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr) { - /* defaults */ - double pstate_latency_us = clk_mgr->base.ctx->dc->dml.soc.dram_clock_change_latency_us; - double sr_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_exit_time_us; - double sr_enter_plus_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_enter_plus_exit_time_us; - uint16_t min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz; - - /* Set A - Normal - default values*/ - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = 0; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF; - - /* Set B - Performance - higher minimum clocks */ -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].valid = true; -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us; -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us; -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = TUNED VALUE; -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF; -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = TUNED VALUE; -// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF; - - /* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */ - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 0; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = 0; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF; - clk_mgr->base.bw_params->dummy_pstate_table[0].dram_speed_mts = 1600; - clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38; - clk_mgr->base.bw_params->dummy_pstate_table[1].dram_speed_mts = 8000; - clk_mgr->base.bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9; - clk_mgr->base.bw_params->dummy_pstate_table[2].dram_speed_mts = 10000; - clk_mgr->base.bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8; - clk_mgr->base.bw_params->dummy_pstate_table[3].dram_speed_mts = 16000; - clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5; - - /* Set D - MALL - SR enter and exit times adjusted for MALL */ - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = pstate_latency_us; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = 2; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = 4; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = 0; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF; + DC_FP_START(); + dcn3_fpu_build_wm_range_table(&clk_mgr->base); + DC_FP_END(); } void dcn3_init_clocks(struct clk_mgr *clk_mgr_base) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c index a8db1306750e5..c00f759fddedf 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c @@ -29,7 +29,7 @@ #include "dcn20/dcn20_resource.h" #include "dcn30/dcn30_resource.h" - +#include "clk_mgr/dcn30/dcn30_smu11_driver_if.h" #include "display_mode_vba_30.h" #include "dcn30_fpu.h" @@ -616,4 +616,65 @@ void dcn30_fpu_update_bw_bounding_box(struct dc *dc, } +void dcn3_fpu_build_wm_range_table(struct clk_mgr *base) +{ + /* defaults */ + double pstate_latency_us = base->ctx->dc->dml.soc.dram_clock_change_latency_us; + double sr_exit_time_us = base->ctx->dc->dml.soc.sr_exit_time_us; + double sr_enter_plus_exit_time_us = base->ctx->dc->dml.soc.sr_enter_plus_exit_time_us; + uint16_t min_uclk_mhz = base->bw_params->clk_table.entries[0].memclk_mhz; + dc_assert_fp_enabled(); + + /* Set A - Normal - default values*/ + base->bw_params->wm_table.nv_entries[WM_A].valid = true; + base->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us; + base->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us; + base->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; + base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; + base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = 0; + base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF; + base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz; + base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF; + + /* Set B - Performance - higher minimum clocks */ +// base->bw_params->wm_table.nv_entries[WM_B].valid = true; +// base->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us; +// base->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us; +// base->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; +// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE; +// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = TUNED VALUE; +// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF; +// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = TUNED VALUE; +// base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF; + + /* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */ + base->bw_params->wm_table.nv_entries[WM_C].valid = true; + base->bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 0; + base->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us; + base->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; + base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE; + base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = 0; + base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF; + base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz; + base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF; + base->bw_params->dummy_pstate_table[0].dram_speed_mts = 1600; + base->bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38; + base->bw_params->dummy_pstate_table[1].dram_speed_mts = 8000; + base->bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9; + base->bw_params->dummy_pstate_table[2].dram_speed_mts = 10000; + base->bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8; + base->bw_params->dummy_pstate_table[3].dram_speed_mts = 16000; + base->bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5; + + /* Set D - MALL - SR enter and exit times adjusted for MALL */ + base->bw_params->wm_table.nv_entries[WM_D].valid = true; + base->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = pstate_latency_us; + base->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = 2; + base->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = 4; + base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL; + base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = 0; + base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF; + base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz; + base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h index dedfe7b5f1731..c2024052a4977 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h @@ -63,5 +63,6 @@ void dcn30_fpu_update_bw_bounding_box(struct dc *dc, unsigned int *dcfclk_mhz, unsigned int *dram_speed_mts); +void dcn3_fpu_build_wm_range_table(struct clk_mgr *base); #endif /* __DCN30_FPU_H__*/ From 5085e0361f5a3675fc0d0919f7b69aded453ceb7 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Wed, 20 Jul 2022 18:32:08 -0100 Subject: [PATCH 090/134] drm/amd/display: move FPU code from dcn301 clk mgr to DML folder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The -mno-gnu-attribute option in dcn301 clk mgr makefile hides a soft vs hard fp error for powerpc. After removing this flag, we can see some FPU code remains there: gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o uses hard float, drivers/gpu/drm/amd/amdgpu/../display/dc/clk_mgr/dcn301/vg_clk_mgr.o uses soft float Therefore, remove the -mno-gnu-attribute flag for dcn301/powerpc and move FPU-associated code to DML folder. Signed-off-by: Melissa Wen Reviewed-by: Maíra Canal Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/clk_mgr/Makefile | 6 -- .../display/dc/clk_mgr/dcn301/vg_clk_mgr.c | 85 ++----------------- .../display/dc/clk_mgr/dcn301/vg_clk_mgr.h | 3 + .../amd/display/dc/dml/dcn301/dcn301_fpu.c | 74 ++++++++++++++++ 4 files changed, 83 insertions(+), 85 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile index 15b660a951a57..271d8e573181c 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile @@ -123,12 +123,6 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN30) ############################################################################### CLK_MGR_DCN301 = vg_clk_mgr.o dcn301_smu.o -# prevent build errors regarding soft-float vs hard-float FP ABI tags -# this code is currently unused on ppc64, as it applies to VanGogh APUs only -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn301/vg_clk_mgr.o := $(call cc-option,-mno-gnu-attribute) -endif - AMD_DAL_CLK_MGR_DCN301 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn301/,$(CLK_MGR_DCN301)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN301) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c index f310b0d25a076..24715ca2fa944 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c @@ -32,6 +32,9 @@ // For dcn20_update_clocks_update_dpp_dto #include "dcn20/dcn20_clk_mgr.h" +// For DML FPU code +#include "dml/dcn20/dcn20_fpu.h" + #include "vg_clk_mgr.h" #include "dcn301_smu.h" #include "reg_helper.h" @@ -526,81 +529,6 @@ static struct clk_bw_params vg_bw_params = { }; -static struct wm_table ddr4_wm_table = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 6.09, - .sr_enter_plus_exit_time_us = 7.14, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 10.12, - .sr_enter_plus_exit_time_us = 11.48, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 10.12, - .sr_enter_plus_exit_time_us = 11.48, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.72, - .sr_exit_time_us = 10.12, - .sr_enter_plus_exit_time_us = 11.48, - .valid = true, - }, - } -}; - -static struct wm_table lpddr5_wm_table = { - .entries = { - { - .wm_inst = WM_A, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 13.5, - .sr_enter_plus_exit_time_us = 16.5, - .valid = true, - }, - { - .wm_inst = WM_B, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 13.5, - .sr_enter_plus_exit_time_us = 16.5, - .valid = true, - }, - { - .wm_inst = WM_C, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 13.5, - .sr_enter_plus_exit_time_us = 16.5, - .valid = true, - }, - { - .wm_inst = WM_D, - .wm_type = WM_TYPE_PSTATE_CHG, - .pstate_latency_us = 11.65333, - .sr_exit_time_us = 13.5, - .sr_enter_plus_exit_time_us = 16.5, - .valid = true, - }, - } -}; - - static unsigned int find_dcfclk_for_voltage(const struct vg_dpm_clocks *clock_table, unsigned int voltage) { @@ -670,10 +598,9 @@ static void vg_clk_mgr_helper_populate_bw_params( /* * WM set D will be re-purposed for memory retraining */ - bw_params->wm_table.entries[WM_D].pstate_latency_us = LPDDR_MEM_RETRAIN_LATENCY; - bw_params->wm_table.entries[WM_D].wm_inst = WM_D; - bw_params->wm_table.entries[WM_D].wm_type = WM_TYPE_RETRAINING; - bw_params->wm_table.entries[WM_D].valid = true; + DC_FP_START(); + dcn21_clk_mgr_set_bw_params_wm_table(bw_params); + DC_FP_END(); } } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.h index 7255477307f13..75884f5729891 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.h @@ -29,6 +29,9 @@ struct watermarks; +extern struct wm_table ddr4_wm_table; +extern struct wm_table lpddr5_wm_table; + struct smu_watermark_set { struct watermarks *wm_set; union large_integer mc_address; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c index e4863f0bf0f61..7ef66e511ec8e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c @@ -214,6 +214,80 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_01_soc = { .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, }; +struct wm_table ddr4_wm_table = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 6.09, + .sr_enter_plus_exit_time_us = 7.14, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.72, + .sr_exit_time_us = 10.12, + .sr_enter_plus_exit_time_us = 11.48, + .valid = true, + }, + } +}; + +struct wm_table lpddr5_wm_table = { + .entries = { + { + .wm_inst = WM_A, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 13.5, + .sr_enter_plus_exit_time_us = 16.5, + .valid = true, + }, + { + .wm_inst = WM_B, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 13.5, + .sr_enter_plus_exit_time_us = 16.5, + .valid = true, + }, + { + .wm_inst = WM_C, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 13.5, + .sr_enter_plus_exit_time_us = 16.5, + .valid = true, + }, + { + .wm_inst = WM_D, + .wm_type = WM_TYPE_PSTATE_CHG, + .pstate_latency_us = 11.65333, + .sr_exit_time_us = 13.5, + .sr_enter_plus_exit_time_us = 16.5, + .valid = true, + }, + } +}; + static void calculate_wm_set_for_vlevel(int vlevel, struct wm_range_table_entry *table_entry, struct dcn_watermarks *wm_set, From 77299956e8867a4cc19c5fd41b797c5152aad1dd Mon Sep 17 00:00:00 2001 From: Roman Li Date: Thu, 21 Jul 2022 17:21:12 -0400 Subject: [PATCH 091/134] drm/amd/display: Fix dc_version detect for dcn314 [Why] While parsing dc_version redundant check leads to invalid dc_version for dcn314. [How] Remove redundant check Fixes: ee7b62e127c8 ("drm/amd/display: Enable DCN314 in DC") Signed-off-by: Roman Li Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index bdaad4ce4b2dc..752ba4ab2b1e3 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -74,6 +74,7 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id) { enum dce_version dc_version = DCE_VERSION_UNKNOWN; + switch (asic_id.chip_family) { #if defined(CONFIG_DRM_AMD_DC_SI) @@ -169,8 +170,7 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id) dc_version = DCN_VERSION_3_21; break; case AMDGPU_FAMILY_GC_11_0_2: - if (ASICREV_IS_GC_11_0_2(asic_id.hw_internal_rev)) - dc_version = DCN_VERSION_3_14; + dc_version = DCN_VERSION_3_14; break; default: dc_version = DCE_VERSION_UNKNOWN; From 1c05d9e53d2cfad1f99d9087a8ad24da4ebc0905 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Thu, 21 Jul 2022 17:52:23 -0400 Subject: [PATCH 092/134] drm/amd/display: Specify supported modifiers for dcn314 Use same modifiers as for AMDGPU_FAMILY_GC_11_0_0 Signed-off-by: Roman Li Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 4702a53a0bf9c..8cd25b2ea0dca 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -1412,6 +1412,7 @@ static bool dm_plane_format_mod_supported(struct drm_plane *plane, } break; case AMDGPU_FAMILY_GC_11_0_0: + case AMDGPU_FAMILY_GC_11_0_2: switch (AMD_FMT_MOD_GET(TILE, modifier)) { case AMD_FMT_MOD_TILE_GFX11_256K_R_X: case AMD_FMT_MOD_TILE_GFX9_64K_R_X: From 0593ad215359d51514c1e6c81ce28ea598efed6b Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 20 Jul 2022 18:00:45 -0400 Subject: [PATCH 093/134] drm/amdkfd: Correct mmu_notifier_get failure handling If process has signal pending, mmu_notifier_get_locked fails and calls ops->free_notifier, kfd_process_free_notifier will schedule kfd_process_wq_release as process refcount is 1, but process structure is already freed. This use after free bug causes system crash with different backtrace. The fix is to increase process refcount and then decrease the refcount after mmu_notifier_get success. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index fc38a4d81420d..d8591721270b2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1405,6 +1405,11 @@ static struct kfd_process *create_process(const struct task_struct *thread) hash_add_rcu(kfd_processes_table, &process->kfd_processes, (uintptr_t)process->mm); + /* Avoid free_notifier to start kfd_process_wq_release if + * mmu_notifier_get failed because of pending signal. + */ + kref_get(&process->ref); + /* MMU notifier registration must be the last call that can fail * because after this point we cannot unwind the process creation. * After this point, mmu_notifier_put will trigger the cleanup by @@ -1417,6 +1422,7 @@ static struct kfd_process *create_process(const struct task_struct *thread) } BUG_ON(mn != &process->mmu_notifier); + kfd_unref_process(process); get_task_struct(process->lead_thread); return process; From 74097f9fd2f5ebdae04fcba59da345386415cbf3 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 7 Jul 2022 16:54:19 -0400 Subject: [PATCH 094/134] drm/amdkfd: Process notifier release callback don't take mutex Move process queues cleanup to deferred work kfd_process_wq_release, to avoid potential deadlock circular locking warning: WARNING: possible circular locking dependency detected the existing dependency chain (in reverse order) is: -> #2 ((work_completion)(&svms->deferred_list_work)){+.+.}-{0:0}: __flush_work+0x343/0x4a0 svm_range_list_lock_and_flush_work+0x39/0xc0 svm_range_set_attr+0xe8/0x1080 [amdgpu] kfd_ioctl+0x19b/0x600 [amdgpu] __x64_sys_ioctl+0x81/0xb0 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae -> #1 (&info->lock#2){+.+.}-{3:3}: __mutex_lock+0xa4/0x940 amdgpu_amdkfd_gpuvm_acquire_process_vm+0x2e3/0x590 kfd_process_device_init_vm+0x61/0x200 [amdgpu] kfd_ioctl_acquire_vm+0x83/0xb0 [amdgpu] kfd_ioctl+0x19b/0x600 [amdgpu] __x64_sys_ioctl+0x81/0xb0 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae -> #0 (&process->mutex){+.+.}-{3:3}: __lock_acquire+0x1365/0x23d0 lock_acquire+0xc9/0x2e0 __mutex_lock+0xa4/0x940 kfd_process_notifier_release+0x96/0xe0 [amdgpu] __mmu_notifier_release+0x94/0x210 exit_mmap+0x35/0x1f0 mmput+0x63/0x120 svm_range_deferred_list_work+0x177/0x2c0 [amdgpu] process_one_work+0x2a4/0x600 worker_thread+0x39/0x3e0 kthread+0x16d/0x1a0 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock((work_completion)(&svms->deferred_list_work)); lock(&info->lock#2); lock((work_completion)(&svms->deferred_list_work)); lock(&process->mutex); Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index d8591721270b2..6c83a519b3a1b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1115,6 +1115,15 @@ static void kfd_process_wq_release(struct work_struct *work) struct kfd_process *p = container_of(work, struct kfd_process, release_work); + kfd_process_dequeue_from_all_devices(p); + pqm_uninit(&p->pqm); + + /* Signal the eviction fence after user mode queues are + * destroyed. This allows any BOs to be freed without + * triggering pointless evictions or waiting for fences. + */ + dma_fence_signal(p->ef); + kfd_process_remove_sysfs(p); kfd_iommu_unbind_process(p); @@ -1179,20 +1188,8 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, cancel_delayed_work_sync(&p->eviction_work); cancel_delayed_work_sync(&p->restore_work); - mutex_lock(&p->mutex); - - kfd_process_dequeue_from_all_devices(p); - pqm_uninit(&p->pqm); - /* Indicate to other users that MM is no longer valid */ p->mm = NULL; - /* Signal the eviction fence after user mode queues are - * destroyed. This allows any BOs to be freed without - * triggering pointless evictions or waiting for fences. - */ - dma_fence_signal(p->ef); - - mutex_unlock(&p->mutex); mmu_notifier_put(&p->mmu_notifier); } From 7acc487ab57e076c823b2b7559aa9e3997962ca2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 20 Jul 2022 16:00:56 -0400 Subject: [PATCH 095/134] drm/amd/display: reduce stack size in dcn32 dml (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move additional dummy structures off the stack and into the dummy vars structure. Fixes the following: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c: In function 'DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation': drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:1659:1: error: the frame size of 2144 bytes is larger than 2048 bytes [-Werror=frame-larger-than=] 1659 | } | ^ drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c: In function 'dml32_ModeSupportAndSystemConfigurationFull': drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:3799:1: error: the frame size of 2464 bytes is larger than 2048 bytes [-Werror=frame-larger-than=] 3799 | } // ModeSupportAndSystemConfigurationFull | ^ v2: move more stuff to dummy structure, fix init order (Alex) Acked-by: Christian König Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Cc: Stephen Rothwell --- .../dc/dml/dcn32/display_mode_vba_32.c | 411 ++++++++---------- .../drm/amd/display/dc/dml/display_mode_vba.h | 36 ++ 2 files changed, 217 insertions(+), 230 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index e9204c711cb96..9c2003fbe8fa4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -65,6 +65,12 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman int iteration; double MaxTotalRDBandwidth; unsigned int NextPrefetchMode; + double MaxTotalRDBandwidthNoUrgentBurst = 0.0; + bool DestinationLineTimesForPrefetchLessThan2 = false; + bool VRatioPrefetchMoreThanMax = false; + double TWait; + double TotalWRBandwidth = 0; + double WRBandwidth = 0; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: --- START ---\n", __func__); @@ -710,11 +716,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman NextPrefetchMode = mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb]; do { - double MaxTotalRDBandwidthNoUrgentBurst = 0.0; - bool DestinationLineTimesForPrefetchLessThan2 = false; - bool VRatioPrefetchMoreThanMax = false; - double dummy_unit_vector[DC__NUM_DPP__MAX]; - MaxTotalRDBandwidth = 0; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, mode_lib->vba.VStartupLines); @@ -723,41 +724,39 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman /* NOTE PerfetchMode variable is invalid in DAL as per the input received. * Hence the direction is to use PrefetchModePerState. */ - double TWait = dml32_CalculateTWait( - mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], - mode_lib->vba.UsesMALLForPStateChange[k], - mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal, - mode_lib->vba.DRRDisplay[k], - mode_lib->vba.DRAMClockChangeLatency, - mode_lib->vba.FCLKChangeLatency, v->UrgentLatency, - mode_lib->vba.SREnterPlusExitTime); - - DmlPipe myPipe; - - myPipe.Dppclk = mode_lib->vba.DPPCLK[k]; - myPipe.Dispclk = mode_lib->vba.DISPCLK; - myPipe.PixelClock = mode_lib->vba.PixelClock[k]; - myPipe.DCFClkDeepSleep = v->DCFCLKDeepSleep; - myPipe.DPPPerSurface = mode_lib->vba.DPPPerPlane[k]; - myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; - myPipe.SourceRotation = mode_lib->vba.SourceRotation[k]; - myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k]; - myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k]; - myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k]; - myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k]; - myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; - myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; - myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; - myPipe.HTotal = mode_lib->vba.HTotal[k]; - myPipe.HActive = mode_lib->vba.HActive[k]; - myPipe.DCCEnable = mode_lib->vba.DCCEnable[k]; - myPipe.ODMMode = mode_lib->vba.ODMCombineEnabled[k]; - myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k]; - myPipe.BytePerPixelY = v->BytePerPixelY[k]; - myPipe.BytePerPixelC = v->BytePerPixelC[k]; - myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP; + TWait = dml32_CalculateTWait( + mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], + mode_lib->vba.UsesMALLForPStateChange[k], + mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal, + mode_lib->vba.DRRDisplay[k], + mode_lib->vba.DRAMClockChangeLatency, + mode_lib->vba.FCLKChangeLatency, v->UrgentLatency, + mode_lib->vba.SREnterPlusExitTime); + + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.Dppclk = mode_lib->vba.DPPCLK[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.Dispclk = mode_lib->vba.DISPCLK; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.PixelClock = mode_lib->vba.PixelClock[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DCFClkDeepSleep = v->DCFCLKDeepSleep; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DPPPerSurface = mode_lib->vba.DPPPerPlane[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.SourceRotation = mode_lib->vba.SourceRotation[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.HTotal = mode_lib->vba.HTotal[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.HActive = mode_lib->vba.HActive[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DCCEnable = mode_lib->vba.DCCEnable[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ODMMode = mode_lib->vba.ODMCombineEnabled[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelY = v->BytePerPixelY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelC = v->BytePerPixelC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP; v->ErrorResult[k] = dml32_CalculatePrefetchSchedule(v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor, - &myPipe, v->DSCDelay[k], + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe, v->DSCDelay[k], mode_lib->vba.DPPCLKDelaySubtotal + mode_lib->vba.DPPCLKDelayCNVCFormater, mode_lib->vba.DPPCLKDelaySCL, mode_lib->vba.DPPCLKDelaySCLLBOnly, @@ -906,8 +905,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman #endif { - double dummy_single[1]; - dml32_CalculatePrefetchBandwithSupport( mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.ReturnBW, @@ -931,16 +928,14 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman /* output */ &MaxTotalRDBandwidth, - &dummy_single[0], + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], &v->PrefetchModeSupported); } for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) - dummy_unit_vector[k] = 1.0; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector[k] = 1.0; { - double dummy_single[1]; - bool dummy_boolean[1]; dml32_CalculatePrefetchBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.ReturnBW, v->NoUrgentLatencyHidingPre, @@ -954,17 +949,17 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->cursor_bw_pre, v->prefetch_vmrow_bw, mode_lib->vba.DPPPerPlane, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, /* output */ - &dummy_single[0], + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], &v->FractionOfUrgentBandwidth, - &dummy_boolean[0]); + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean); } if (VRatioPrefetchMoreThanMax != false || DestinationLineTimesForPrefetchLessThan2 != false) { @@ -1047,8 +1042,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman } { - double dummy_single[2]; - bool dummy_boolean[1]; dml32_CalculateImmediateFlipBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.ReturnBW, mode_lib->vba.ImmediateFlipRequirement, @@ -1072,7 +1065,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman /* output */ &v->total_dcn_read_bw_with_flip, // Single *TotalBandwidth - &dummy_single[0], // Single *FractionOfUrgentBandwidth + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], // Single *FractionOfUrgentBandwidth &v->ImmediateFlipSupported); // Boolean *ImmediateFlipBandwidthSupport dml32_CalculateImmediateFlipBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces, @@ -1089,17 +1082,17 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->cursor_bw_pre, v->prefetch_vmrow_bw, mode_lib->vba.DPPPerPlane, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, /* output */ - &dummy_single[1], // Single *TotalBandwidth + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[1], // Single *TotalBandwidth &v->FractionOfUrgentBandwidthImmediateFlip, // Single *FractionOfUrgentBandwidth - &dummy_boolean[0]); // Boolean *ImmediateFlipBandwidthSupport + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean); // Boolean *ImmediateFlipBandwidthSupport } for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { @@ -1157,22 +1150,17 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman //Watermarks and NB P-State/DRAM Clock Change Support { - SOCParametersList mmSOCParameters; - enum clock_change_support dummy_dramchange_support; - enum dm_fclock_change_support dummy_fclkchange_support; - bool dummy_USRRetrainingSupport; - - mmSOCParameters.UrgentLatency = v->UrgentLatency; - mmSOCParameters.ExtraLatency = v->UrgentExtraLatency; - mmSOCParameters.WritebackLatency = mode_lib->vba.WritebackLatency; - mmSOCParameters.DRAMClockChangeLatency = mode_lib->vba.DRAMClockChangeLatency; - mmSOCParameters.FCLKChangeLatency = mode_lib->vba.FCLKChangeLatency; - mmSOCParameters.SRExitTime = mode_lib->vba.SRExitTime; - mmSOCParameters.SREnterPlusExitTime = mode_lib->vba.SREnterPlusExitTime; - mmSOCParameters.SRExitZ8Time = mode_lib->vba.SRExitZ8Time; - mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->vba.SREnterPlusExitZ8Time; - mmSOCParameters.USRRetrainingLatency = mode_lib->vba.USRRetrainingLatency; - mmSOCParameters.SMNLatency = mode_lib->vba.SMNLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.UrgentLatency = v->UrgentLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.ExtraLatency = v->UrgentExtraLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.WritebackLatency = mode_lib->vba.WritebackLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.DRAMClockChangeLatency = mode_lib->vba.DRAMClockChangeLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.FCLKChangeLatency = mode_lib->vba.FCLKChangeLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SRExitTime = mode_lib->vba.SRExitTime; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SREnterPlusExitTime = mode_lib->vba.SREnterPlusExitTime; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SRExitZ8Time = mode_lib->vba.SRExitZ8Time; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->vba.SREnterPlusExitZ8Time; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.USRRetrainingLatency = mode_lib->vba.USRRetrainingLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SMNLatency = mode_lib->vba.SMNLatency; dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( mode_lib->vba.USRRetrainingRequiredFinal, @@ -1190,7 +1178,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->dpte_group_bytes, v->meta_row_height, v->meta_row_height_chroma, - mmSOCParameters, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters, mode_lib->vba.WritebackChunkSize, mode_lib->vba.SOCCLK, v->DCFCLKDeepSleep, @@ -1227,12 +1215,12 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman /* Output */ &v->Watermark, - &dummy_dramchange_support, + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_dramchange_support, v->MaxActiveDRAMClockChangeLatencySupported, v->SubViewportLinesNeededInMALL, - &dummy_fclkchange_support, + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_fclkchange_support, &v->MinActiveFCLKChangeLatencySupported, - &dummy_USRRetrainingSupport, + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_USRRetrainingSupport, mode_lib->vba.ActiveDRAMClockChangeLatencyMargin); /* DCN32 has a new struct Watermarks (typedef) which is used to store @@ -1494,9 +1482,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman { //Maximum Bandwidth Used - double TotalWRBandwidth = 0; - double WRBandwidth = 0; - for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.WritebackEnable[k] == true && mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { @@ -1590,9 +1575,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman #ifdef __DML_VBA_ALLOW_DELTA__ { - double dummy_single[2]; unsigned int dummy_integer[1]; - bool dummy_boolean[1]; // Calculate z8 stutter eff assuming 0 reserved space dml32_CalculateStutterEfficiency(v->CompressedBufferSizeInkByte, @@ -1645,14 +1628,14 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->meta_row_bw, v->dpte_row_bw, /* Output */ - &dummy_single[0], - &dummy_single[1], + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[1], &dummy_integer[0], &v->Z8StutterEfficiencyNotIncludingVBlankBestCase, &v->Z8StutterEfficiencyBestCase, &v->Z8NumberOfStutterBurstsPerFrameBestCase, &v->StutterPeriodBestCase, - &dummy_boolean[0]); + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean); } #else v->Z8StutterEfficiencyNotIncludingVBlankBestCase = v->Z8StutterEfficiencyNotIncludingVBlank; @@ -1668,32 +1651,18 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) { - unsigned int dummy_integer[4]; - bool dummy_boolean[2]; - bool MPCCombineMethodAsNeededForPStateChangeAndVoltage; - bool MPCCombineMethodAsPossible; - enum odm_combine_mode dummy_odm_mode[DC__NUM_DPP__MAX]; - unsigned int TotalNumberOfActiveOTG; - unsigned int TotalNumberOfActiveHDMIFRL; - unsigned int TotalNumberOfActiveDP2p0; - unsigned int TotalNumberOfActiveDP2p0Outputs; - unsigned int TotalDSCUnitsRequired; - unsigned int m; - unsigned int ReorderingBytes; - bool FullFrameMALLPStateMethod; - bool SubViewportMALLPStateMethod; - bool PhantomPipeMALLPStateMethod; + struct vba_vars_st *v = &mode_lib->vba; + int i, j; + unsigned int k, m; unsigned int MaximumMPCCombine; + unsigned int NumberOfNonCombinedSurfaceOfMaximumBandwidth; + unsigned int TotalSlots; bool CompBufReservedSpaceNeedAdjustment; bool CompBufReservedSpaceNeedAdjustmentSingleDPP; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: called\n", __func__); #endif - struct vba_vars_st *v = &mode_lib->vba; - - int i, j; - unsigned int k; /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ @@ -1945,7 +1914,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.Read256BlockHeightC, mode_lib->vba.Read256BlockWidthY, mode_lib->vba.Read256BlockWidthC, - dummy_odm_mode, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_odm_mode, mode_lib->vba.BlendingAndTiming, mode_lib->vba.BytePerPixelY, mode_lib->vba.BytePerPixelC, @@ -1973,35 +1942,26 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.SingleDPPViewportSizeSupportPerSurface,/* bool ViewportSizeSupportPerSurface[] */ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[1][0]); /* bool *ViewportSizeSupport */ - MPCCombineMethodAsNeededForPStateChangeAndVoltage = false; - MPCCombineMethodAsPossible = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible = false; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.MPCCombineUse[k] == dm_mpc_reduce_voltage_and_clocks) - MPCCombineMethodAsNeededForPStateChangeAndVoltage = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage = true; if (mode_lib->vba.MPCCombineUse[k] == dm_mpc_always_when_possible) - MPCCombineMethodAsPossible = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible = true; } - mode_lib->vba.MPCCombineMethodIncompatible = MPCCombineMethodAsNeededForPStateChangeAndVoltage - && MPCCombineMethodAsPossible; + mode_lib->vba.MPCCombineMethodIncompatible = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage + && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible; for (i = 0; i < v->soc.num_states; i++) { for (j = 0; j < 2; j++) { - bool NoChroma; mode_lib->vba.TotalNumberOfActiveDPP[i][j] = 0; mode_lib->vba.TotalAvailablePipesSupport[i][j] = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC = dm_odm_combine_mode_disabled; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC = dm_odm_combine_mode_disabled; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { - - bool TotalAvailablePipesSupportNoDSC; - unsigned int NumberOfDPPNoDSC; - enum odm_combine_mode ODMModeNoDSC = dm_odm_combine_mode_disabled; - double RequiredDISPCLKPerSurfaceNoDSC; - bool TotalAvailablePipesSupportDSC; - unsigned int NumberOfDPPDSC; - enum odm_combine_mode ODMModeDSC = dm_odm_combine_mode_disabled; - double RequiredDISPCLKPerSurfaceDSC; - dml32_CalculateODMMode( mode_lib->vba.MaximumPixelsPerLinePerDSCUnit, mode_lib->vba.HActive[k], @@ -2018,10 +1978,10 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DISPCLKDPPCLKVCOSpeed, /* Output */ - &TotalAvailablePipesSupportNoDSC, - &NumberOfDPPNoDSC, - &ODMModeNoDSC, - &RequiredDISPCLKPerSurfaceNoDSC); + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportNoDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPNoDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceNoDSC); dml32_CalculateODMMode( mode_lib->vba.MaximumPixelsPerLinePerDSCUnit, @@ -2039,10 +1999,10 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DISPCLKDPPCLKVCOSpeed, /* Output */ - &TotalAvailablePipesSupportDSC, - &NumberOfDPPDSC, - &ODMModeDSC, - &RequiredDISPCLKPerSurfaceDSC); + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceDSC); dml32_CalculateOutputLink( mode_lib->vba.PHYCLKPerState[i], @@ -2060,8 +2020,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.NumberOfDSCSlices[k], mode_lib->vba.AudioSampleRate[k], mode_lib->vba.AudioSampleLayout[k], - ODMModeNoDSC, - ODMModeDSC, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC, mode_lib->vba.DSCEnable[k], mode_lib->vba.OutputLinkDPLanes[k], mode_lib->vba.OutputLinkDPRate[k], @@ -2075,21 +2035,21 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &mode_lib->vba.RequiredSlots[i][k]); if (mode_lib->vba.RequiresDSC[i][k] == false) { - mode_lib->vba.ODMCombineEnablePerState[i][k] = ODMModeNoDSC; + mode_lib->vba.ODMCombineEnablePerState[i][k] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC; mode_lib->vba.RequiredDISPCLKPerSurface[i][j][k] = - RequiredDISPCLKPerSurfaceNoDSC; - if (!TotalAvailablePipesSupportNoDSC) + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceNoDSC; + if (!v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportNoDSC) mode_lib->vba.TotalAvailablePipesSupport[i][j] = false; mode_lib->vba.TotalNumberOfActiveDPP[i][j] = - mode_lib->vba.TotalNumberOfActiveDPP[i][j] + NumberOfDPPNoDSC; + mode_lib->vba.TotalNumberOfActiveDPP[i][j] + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPNoDSC; } else { - mode_lib->vba.ODMCombineEnablePerState[i][k] = ODMModeDSC; + mode_lib->vba.ODMCombineEnablePerState[i][k] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC; mode_lib->vba.RequiredDISPCLKPerSurface[i][j][k] = - RequiredDISPCLKPerSurfaceDSC; - if (!TotalAvailablePipesSupportDSC) + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceDSC; + if (!v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportDSC) mode_lib->vba.TotalAvailablePipesSupport[i][j] = false; mode_lib->vba.TotalNumberOfActiveDPP[i][j] = - mode_lib->vba.TotalNumberOfActiveDPP[i][j] + NumberOfDPPDSC; + mode_lib->vba.TotalNumberOfActiveDPP[i][j] + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPDSC; } } @@ -2124,7 +2084,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] = 0; - NoChroma = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma = true; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.NoOfDPP[i][j][k] == 1) @@ -2134,7 +2094,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l || mode_lib->vba.SourcePixelFormat[k] == dm_420_10 || mode_lib->vba.SourcePixelFormat[k] == dm_420_12 || mode_lib->vba.SourcePixelFormat[k] == dm_rgbe_alpha) { - NoChroma = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma = false; } } @@ -2145,15 +2105,15 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l if (j == 1 && !dml32_UnboundedRequest(mode_lib->vba.UseUnboundedRequesting, - mode_lib->vba.TotalNumberOfActiveDPP[i][j], NoChroma, + mode_lib->vba.TotalNumberOfActiveDPP[i][j], v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma, mode_lib->vba.Output[0], mode_lib->vba.SurfaceTiling[0], CompBufReservedSpaceNeedAdjustment, mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)) { while (!(mode_lib->vba.TotalNumberOfActiveDPP[i][j] >= mode_lib->vba.MaxNumDPP || mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] == 0)) { - double BWOfNonCombinedSurfaceOfMaximumBandwidth = 0; - unsigned int NumberOfNonCombinedSurfaceOfMaximumBandwidth = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth = 0; + NumberOfNonCombinedSurfaceOfMaximumBandwidth = 0; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.MPCCombineUse[k] @@ -2161,13 +2121,13 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.MPCCombineUse[k] != dm_mpc_reduce_voltage && mode_lib->vba.ReadBandwidthLuma[k] + mode_lib->vba.ReadBandwidthChroma[k] > - BWOfNonCombinedSurfaceOfMaximumBandwidth && + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth && (mode_lib->vba.ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_2to1 && mode_lib->vba.ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) && mode_lib->vba.MPCCombine[i][j][k] == false) { - BWOfNonCombinedSurfaceOfMaximumBandwidth = + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth = mode_lib->vba.ReadBandwidthLuma[k] + mode_lib->vba.ReadBandwidthChroma[k]; NumberOfNonCombinedSurfaceOfMaximumBandwidth = k; @@ -2233,28 +2193,28 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } // i (VOLTAGE_STATE) /* Total Available OTG, HDMIFRL, DP Support Check */ - TotalNumberOfActiveOTG = 0; - TotalNumberOfActiveHDMIFRL = 0; - TotalNumberOfActiveDP2p0 = 0; - TotalNumberOfActiveDP2p0Outputs = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveHDMIFRL = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs = 0; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.BlendingAndTiming[k] == k) { - TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG + 1; if (mode_lib->vba.Output[k] == dm_dp2p0) { - TotalNumberOfActiveDP2p0 = TotalNumberOfActiveDP2p0 + 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 + 1; if (mode_lib->vba.OutputMultistreamId[k] == k || mode_lib->vba.OutputMultistreamEn[k] == false) { - TotalNumberOfActiveDP2p0Outputs = TotalNumberOfActiveDP2p0Outputs + 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs + 1; } } } } - mode_lib->vba.NumberOfOTGSupport = (TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG); - mode_lib->vba.NumberOfHDMIFRLSupport = (TotalNumberOfActiveHDMIFRL <= mode_lib->vba.MaxNumHDMIFRLOutputs); - mode_lib->vba.NumberOfDP2p0Support = (TotalNumberOfActiveDP2p0 <= mode_lib->vba.MaxNumDP2p0Streams - && TotalNumberOfActiveDP2p0Outputs <= mode_lib->vba.MaxNumDP2p0Outputs); + mode_lib->vba.NumberOfOTGSupport = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG); + mode_lib->vba.NumberOfHDMIFRLSupport = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveHDMIFRL <= mode_lib->vba.MaxNumHDMIFRLOutputs); + mode_lib->vba.NumberOfDP2p0Support = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 <= mode_lib->vba.MaxNumDP2p0Streams + && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs <= mode_lib->vba.MaxNumDP2p0Outputs); /* Display IO and DSC Support Check */ mode_lib->vba.NonsupportedDSCInputBPC = false; @@ -2269,8 +2229,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } for (i = 0; i < v->soc.num_states; ++i) { - unsigned int TotalSlots; - mode_lib->vba.ExceededMultistreamSlots[i] = false; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.OutputMultistreamEn[k] == true && mode_lib->vba.OutputMultistreamId[k] == k) { @@ -2441,12 +2399,12 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } /* Check DSC Unit and Slices Support */ - TotalDSCUnitsRequired = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = 0; for (i = 0; i < v->soc.num_states; ++i) { mode_lib->vba.NotEnoughDSCUnits[i] = false; mode_lib->vba.NotEnoughDSCSlices[i] = false; - TotalDSCUnitsRequired = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = 0; mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = true; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.RequiresDSC[i][k] == true) { @@ -2454,33 +2412,31 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l if (mode_lib->vba.HActive[k] > 4 * mode_lib->vba.MaximumPixelsPerLinePerDSCUnit) mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false; - TotalDSCUnitsRequired = TotalDSCUnitsRequired + 4; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 4; if (mode_lib->vba.NumberOfDSCSlices[k] > 16) mode_lib->vba.NotEnoughDSCSlices[i] = true; } else if (mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { if (mode_lib->vba.HActive[k] > 2 * mode_lib->vba.MaximumPixelsPerLinePerDSCUnit) mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false; - TotalDSCUnitsRequired = TotalDSCUnitsRequired + 2; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 2; if (mode_lib->vba.NumberOfDSCSlices[k] > 8) mode_lib->vba.NotEnoughDSCSlices[i] = true; } else { if (mode_lib->vba.HActive[k] > mode_lib->vba.MaximumPixelsPerLinePerDSCUnit) mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false; - TotalDSCUnitsRequired = TotalDSCUnitsRequired + 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 1; if (mode_lib->vba.NumberOfDSCSlices[k] > 4) mode_lib->vba.NotEnoughDSCSlices[i] = true; } } } - if (TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC) + if (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC) mode_lib->vba.NotEnoughDSCUnits[i] = true; } /*DSC Delay per state*/ for (i = 0; i < v->soc.num_states; ++i) { - unsigned int m; - for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { mode_lib->vba.DSCDelayPerState[i][k] = dml32_DSCDelayRequirement( mode_lib->vba.RequiresDSC[i][k], mode_lib->vba.ODMCombineEnablePerState[i][k], @@ -2576,8 +2532,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DETBufferSizeCThisState, &mode_lib->vba.UnboundedRequestEnabledThisState, &mode_lib->vba.CompressedBufferSizeInkByteThisState, - &dummy_integer[0], /* Long CompBufReservedSpaceKBytes */ - &dummy_boolean[0], /* bool CompBufReservedSpaceNeedAdjustment */ + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], /* Long CompBufReservedSpaceKBytes */ + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean[0], /* bool CompBufReservedSpaceNeedAdjustment */ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[0], &mode_lib->vba.ViewportSizeSupport[i][j]); @@ -2942,7 +2898,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } - ReorderingBytes = mode_lib->vba.NumberOfChannels + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes = mode_lib->vba.NumberOfChannels * dml_max3(mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly, mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly); @@ -2998,20 +2954,20 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l && (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)); } - FullFrameMALLPStateMethod = false; - SubViewportMALLPStateMethod = false; - PhantomPipeMALLPStateMethod = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod = false; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame) - FullFrameMALLPStateMethod = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod = true; if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) - SubViewportMALLPStateMethod = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod = true; if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) - PhantomPipeMALLPStateMethod = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod = true; } - mode_lib->vba.InvalidCombinationOfMALLUseForPState = (SubViewportMALLPStateMethod - != PhantomPipeMALLPStateMethod) || (SubViewportMALLPStateMethod && FullFrameMALLPStateMethod); + mode_lib->vba.InvalidCombinationOfMALLUseForPState = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod + != v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod) || (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod); if (mode_lib->vba.UseMinimumRequiredDCFCLK == true) { dml32_UseMinimumDCFCLK( @@ -3025,7 +2981,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.SREnterPlusExitTime, mode_lib->vba.ReturnBusWidth, mode_lib->vba.RoundTripPingLatencyCycles, - ReorderingBytes, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes, mode_lib->vba.PixelChunkSizeInKByte, mode_lib->vba.MetaChunkSize, mode_lib->vba.GPUVMEnable, @@ -3088,7 +3044,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l / mode_lib->vba.ReturnBWPerState[i][j] > (mode_lib->vba.RoundTripPingLatencyCycles + 32) / mode_lib->vba.DCFCLKState[i][j] - + ReorderingBytes / mode_lib->vba.ReturnBWPerState[i][j]) { + + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes / mode_lib->vba.ReturnBWPerState[i][j]) { mode_lib->vba.ROBSupport[i][j] = true; } else { mode_lib->vba.ROBSupport[i][j] = false; @@ -3130,9 +3086,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l for (i = 0; i < (int) v->soc.num_states; ++i) { for (j = 0; j <= 1; ++j) { - double VMDataOnlyReturnBWPerState; - double HostVMInefficiencyFactor; - unsigned int NextPrefetchModeState; mode_lib->vba.TimeCalc = 24 / mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j]; @@ -3172,37 +3125,35 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.UrgentBurstFactorChroma, mode_lib->vba.UrgentBurstFactorCursor); - VMDataOnlyReturnBWPerState = dml32_get_return_bw_mbps_vm_only(&mode_lib->vba.soc, i, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.VMDataOnlyReturnBWPerState = dml32_get_return_bw_mbps_vm_only(&mode_lib->vba.soc, i, mode_lib->vba.DCFCLKState[i][j], mode_lib->vba.FabricClockPerState[i], mode_lib->vba.DRAMSpeedPerState[i]); - HostVMInefficiencyFactor = 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor = 1; if (mode_lib->vba.GPUVMEnable && mode_lib->vba.HostVMEnable) - HostVMInefficiencyFactor = mode_lib->vba.ReturnBWPerState[i][j] - / VMDataOnlyReturnBWPerState; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor = mode_lib->vba.ReturnBWPerState[i][j] + / v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.VMDataOnlyReturnBWPerState; mode_lib->vba.ExtraLatency = dml32_CalculateExtraLatency( - mode_lib->vba.RoundTripPingLatencyCycles, ReorderingBytes, + mode_lib->vba.RoundTripPingLatencyCycles, v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes, mode_lib->vba.DCFCLKState[i][j], mode_lib->vba.TotalNumberOfActiveDPP[i][j], mode_lib->vba.PixelChunkSizeInKByte, mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j], mode_lib->vba.MetaChunkSize, mode_lib->vba.ReturnBWPerState[i][j], mode_lib->vba.GPUVMEnable, mode_lib->vba.HostVMEnable, mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.NoOfDPPThisState, mode_lib->vba.dpte_group_bytes, - HostVMInefficiencyFactor, mode_lib->vba.HostVMMinPageSize, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, mode_lib->vba.HostVMMinPageSize, mode_lib->vba.HostVMMaxNonCachedPageTableLevels); - NextPrefetchModeState = mode_lib->vba.MinPrefetchMode; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState = mode_lib->vba.MinPrefetchMode; mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[i][j]; do { - mode_lib->vba.PrefetchModePerState[i][j] = NextPrefetchModeState; + mode_lib->vba.PrefetchModePerState[i][j] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState; mode_lib->vba.MaxVStartup = mode_lib->vba.NextMaxVStartup; for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { - DmlPipe myPipe; - mode_lib->vba.TWait = dml32_CalculateTWait( mode_lib->vba.PrefetchModePerState[i][j], mode_lib->vba.UsesMALLForPStateChange[k], @@ -3212,34 +3163,34 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.FCLKChangeLatency, mode_lib->vba.UrgLatency[i], mode_lib->vba.SREnterPlusExitTime); - myPipe.Dppclk = mode_lib->vba.RequiredDPPCLK[i][j][k]; - myPipe.Dispclk = mode_lib->vba.RequiredDISPCLK[i][j]; - myPipe.PixelClock = mode_lib->vba.PixelClock[k]; - myPipe.DCFClkDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j]; - myPipe.DPPPerSurface = mode_lib->vba.NoOfDPP[i][j][k]; - myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; - myPipe.SourceRotation = mode_lib->vba.SourceRotation[k]; - myPipe.BlockWidth256BytesY = mode_lib->vba.Read256BlockWidthY[k]; - myPipe.BlockHeight256BytesY = mode_lib->vba.Read256BlockHeightY[k]; - myPipe.BlockWidth256BytesC = mode_lib->vba.Read256BlockWidthC[k]; - myPipe.BlockHeight256BytesC = mode_lib->vba.Read256BlockHeightC[k]; - myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; - myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; - myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; - myPipe.HTotal = mode_lib->vba.HTotal[k]; - myPipe.HActive = mode_lib->vba.HActive[k]; - myPipe.DCCEnable = mode_lib->vba.DCCEnable[k]; - myPipe.ODMMode = mode_lib->vba.ODMCombineEnablePerState[i][k]; - myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k]; - myPipe.BytePerPixelY = mode_lib->vba.BytePerPixelY[k]; - myPipe.BytePerPixelC = mode_lib->vba.BytePerPixelC[k]; - myPipe.ProgressiveToInterlaceUnitInOPP = + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.Dppclk = mode_lib->vba.RequiredDPPCLK[i][j][k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.Dispclk = mode_lib->vba.RequiredDISPCLK[i][j]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.PixelClock = mode_lib->vba.PixelClock[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DCFClkDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DPPPerSurface = mode_lib->vba.NoOfDPP[i][j][k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.SourceRotation = mode_lib->vba.SourceRotation[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockWidth256BytesY = mode_lib->vba.Read256BlockWidthY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockHeight256BytesY = mode_lib->vba.Read256BlockHeightY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockWidth256BytesC = mode_lib->vba.Read256BlockWidthC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockHeight256BytesC = mode_lib->vba.Read256BlockHeightC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.HTotal = mode_lib->vba.HTotal[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.HActive = mode_lib->vba.HActive[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DCCEnable = mode_lib->vba.DCCEnable[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ODMMode = mode_lib->vba.ODMCombineEnablePerState[i][k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BytePerPixelY = mode_lib->vba.BytePerPixelY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BytePerPixelC = mode_lib->vba.BytePerPixelC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP; mode_lib->vba.NoTimeForPrefetch[i][j][k] = dml32_CalculatePrefetchSchedule( - HostVMInefficiencyFactor, - &myPipe, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe, mode_lib->vba.DSCDelayPerState[i][k], mode_lib->vba.DPPCLKDelaySubtotal + mode_lib->vba.DPPCLKDelayCNVCFormater, @@ -3298,7 +3249,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[0], // double *Tdmdl_vm &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[1], // double *Tdmdl &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[2], // double *TSetup - &dummy_integer[0], // unsigned int *VUpdateOffsetPix + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], // unsigned int *VUpdateOffsetPix &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[3], // unsigned int *VUpdateWidthPix &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[4]); // unsigned int *VReadyOffsetPix } @@ -3427,7 +3378,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { - dml32_CalculateFlipSchedule(HostVMInefficiencyFactor, + dml32_CalculateFlipSchedule(v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, mode_lib->vba.ExtraLatency, mode_lib->vba.UrgLatency[i], mode_lib->vba.GPUVMMaxPageTableLevels, @@ -3501,7 +3452,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l if (mode_lib->vba.MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || mode_lib->vba.AnyLinesForVMOrRowTooLarge == false) { mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[i][j]; - NextPrefetchModeState = NextPrefetchModeState + 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState + 1; } else { mode_lib->vba.NextMaxVStartup = mode_lib->vba.NextMaxVStartup - 1; } @@ -3515,7 +3466,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l && !mode_lib->vba.ImmediateFlipRequiredFinal) || mode_lib->vba.ImmediateFlipSupportedForState[i][j] == true)) || (mode_lib->vba.NextMaxVStartup == mode_lib->vba.MaxMaxVStartup[i][j] - && NextPrefetchModeState > mode_lib->vba.MaxPrefetchMode))); + && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState > mode_lib->vba.MaxPrefetchMode))); for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { mode_lib->vba.use_one_row_for_frame_this_state[k] = @@ -3591,7 +3542,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &mode_lib->vba.Watermark, // Store the values in vba &mode_lib->vba.DRAMClockChangeSupport[i][j], &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single2[0], // double *MaxActiveDRAMClockChangeLatencySupported - &dummy_integer[0], // Long SubViewportLinesNeededInMALL[] + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], // Long SubViewportLinesNeededInMALL[] &mode_lib->vba.FCLKChangeSupport[i][j], &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single2[1], // double *MinActiveFCLKChangeLatencySupported &mode_lib->vba.USRRetrainingSupport[i][j], diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index 6e61b53823619..492aec634b685 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -196,6 +196,13 @@ struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCal unsigned int ReorderBytes; unsigned int VMDataOnlyReturnBW; double HostVMInefficiencyFactor; + DmlPipe myPipe; + SOCParametersList mmSOCParameters; + double dummy_unit_vector[DC__NUM_DPP__MAX]; + double dummy_single[2]; + enum clock_change_support dummy_dramchange_support; + enum dm_fclock_change_support dummy_fclkchange_support; + bool dummy_USRRetrainingSupport; }; struct dml32_ModeSupportAndSystemConfigurationFull { @@ -211,6 +218,35 @@ struct dml32_ModeSupportAndSystemConfigurationFull { double DSTXAfterScaler[DC__NUM_DPP__MAX]; double MaxTotalVActiveRDBandwidth; bool dummy_boolean_array[2][DC__NUM_DPP__MAX]; + enum odm_combine_mode dummy_odm_mode[DC__NUM_DPP__MAX]; + DmlPipe myPipe; + unsigned int dummy_integer[4]; + unsigned int TotalNumberOfActiveOTG; + unsigned int TotalNumberOfActiveHDMIFRL; + unsigned int TotalNumberOfActiveDP2p0; + unsigned int TotalNumberOfActiveDP2p0Outputs; + unsigned int TotalDSCUnitsRequired; + unsigned int ReorderingBytes; + unsigned int TotalSlots; + unsigned int NumberOfDPPDSC; + unsigned int NumberOfDPPNoDSC; + unsigned int NextPrefetchModeState; + bool MPCCombineMethodAsNeededForPStateChangeAndVoltage; + bool MPCCombineMethodAsPossible; + bool FullFrameMALLPStateMethod; + bool SubViewportMALLPStateMethod; + bool PhantomPipeMALLPStateMethod; + bool NoChroma; + bool TotalAvailablePipesSupportNoDSC; + bool TotalAvailablePipesSupportDSC; + enum odm_combine_mode ODMModeNoDSC; + enum odm_combine_mode ODMModeDSC; + double RequiredDISPCLKPerSurfaceNoDSC; + double RequiredDISPCLKPerSurfaceDSC; + double BWOfNonCombinedSurfaceOfMaximumBandwidth; + double VMDataOnlyReturnBWPerState; + double HostVMInefficiencyFactor; + bool dummy_boolean[2]; }; struct dummy_vars { From bac4b41d917a1d999308bb1e779f8c3b39c19f67 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 20 Jul 2022 23:06:49 -0400 Subject: [PATCH 096/134] drm/amd/display: reduce stack for dml32_CalculateSwathAndDETConfiguration Move stack variables to dummy structure. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Cc: Stephen Rothwell --- .../dc/dml/dcn32/display_mode_vba_32.c | 3 + .../dc/dml/dcn32/display_mode_vba_util_32.c | 121 +++++++++--------- .../dc/dml/dcn32/display_mode_vba_util_32.h | 2 + .../drm/amd/display/dc/dml/display_mode_vba.h | 15 +++ 4 files changed, 77 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 9c2003fbe8fa4..c0e537731c1f8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -221,6 +221,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman // VBA_DELTA // Calculate DET size, swath height dml32_CalculateSwathAndDETConfiguration( + &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration, mode_lib->vba.DETSizeOverride, mode_lib->vba.UsesMALLForPStateChange, mode_lib->vba.ConfigReturnBufferSizeInKByte, @@ -1878,6 +1879,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } dml32_CalculateSwathAndDETConfiguration( + &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration, mode_lib->vba.DETSizeOverride, mode_lib->vba.UsesMALLForPStateChange, mode_lib->vba.ConfigReturnBufferSizeInKByte, @@ -2474,6 +2476,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } dml32_CalculateSwathAndDETConfiguration( + &v->dummy_vars.dml32_CalculateSwathAndDETConfiguration, mode_lib->vba.DETSizeOverride, mode_lib->vba.UsesMALLForPStateChange, mode_lib->vba.ConfigReturnBufferSizeInKByte, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index c8a3f367d6229..da5befd7fdec9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -391,6 +391,7 @@ void dml32_CalculateBytePerPixelAndBlockSizes( } // CalculateBytePerPixelAndBlockSizes void dml32_CalculateSwathAndDETConfiguration( + struct dml32_CalculateSwathAndDETConfiguration *st_vars, unsigned int DETSizeOverride[], enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], unsigned int ConfigReturnBufferSizeInKByte, @@ -455,18 +456,10 @@ void dml32_CalculateSwathAndDETConfiguration( bool ViewportSizeSupportPerSurface[], bool *ViewportSizeSupport) { - unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX]; - unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX]; - unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX]; - unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX]; - unsigned int RoundedUpSwathSizeBytesY; - unsigned int RoundedUpSwathSizeBytesC; - double SwathWidthdoubleDPP[DC__NUM_DPP__MAX]; - double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX]; unsigned int k; - unsigned int TotalActiveDPP = 0; - bool NoChromaSurfaces = true; - unsigned int DETBufferSizeInKByteForSwathCalculation; + + st_vars->TotalActiveDPP = 0; + st_vars->NoChromaSurfaces = true; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); @@ -501,43 +494,43 @@ void dml32_CalculateSwathAndDETConfiguration( DPPPerSurface, /* Output */ - SwathWidthdoubleDPP, - SwathWidthdoubleDPPChroma, + st_vars->SwathWidthdoubleDPP, + st_vars->SwathWidthdoubleDPPChroma, SwathWidth, SwathWidthChroma, - MaximumSwathHeightY, - MaximumSwathHeightC, + st_vars->MaximumSwathHeightY, + st_vars->MaximumSwathHeightC, swath_width_luma_ub, swath_width_chroma_ub); for (k = 0; k < NumberOfActiveSurfaces; ++k) { - RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; - RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; + st_vars->RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * st_vars->MaximumSwathHeightY[k]; + st_vars->RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * st_vars->MaximumSwathHeightC[k]; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]); dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]); - dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]); + dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, st_vars->MaximumSwathHeightY[k]); dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k, - RoundedUpMaxSwathSizeBytesY[k]); + st_vars->RoundedUpMaxSwathSizeBytesY[k]); dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]); dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]); - dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]); + dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, st_vars->MaximumSwathHeightC[k]); dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k, - RoundedUpMaxSwathSizeBytesC[k]); + st_vars->RoundedUpMaxSwathSizeBytesC[k]); #endif if (SourcePixelFormat[k] == dm_420_10) { - RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256); - RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256); + st_vars->RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesY[k], 256); + st_vars->RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesC[k], 256); } } for (k = 0; k < NumberOfActiveSurfaces; ++k) { - TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]); + st_vars->TotalActiveDPP = st_vars->TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]); if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) { - NoChromaSurfaces = false; + st_vars->NoChromaSurfaces = false; } } @@ -547,10 +540,10 @@ void dml32_CalculateSwathAndDETConfiguration( // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio] // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req - *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512); + *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (st_vars->RoundedUpMaxSwathSizeBytesY[0]/512); if (*CompBufReservedSpaceNeedAdjustment == 1) { - *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512; + *CompBufReservedSpaceKBytes = ROBSizeKBytes - st_vars->RoundedUpMaxSwathSizeBytesY[0]/512; } #ifdef __DML_VBA_DEBUG__ @@ -558,7 +551,7 @@ void dml32_CalculateSwathAndDETConfiguration( dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment); #endif - *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); + *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, st_vars->TotalActiveDPP, st_vars->NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); dml32_CalculateDETBufferSize(DETSizeOverride, UseMALLForPStateChange, @@ -573,8 +566,8 @@ void dml32_CalculateSwathAndDETConfiguration( SourcePixelFormat, ReadBandwidthLuma, ReadBandwidthChroma, - RoundedUpMaxSwathSizeBytesY, - RoundedUpMaxSwathSizeBytesC, + st_vars->RoundedUpMaxSwathSizeBytesY, + st_vars->RoundedUpMaxSwathSizeBytesC, DPPPerSurface, /* Output */ @@ -582,7 +575,7 @@ void dml32_CalculateSwathAndDETConfiguration( CompressedBufferSizeInkByte); #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); + dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, st_vars->TotalActiveDPP); dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); @@ -593,42 +586,42 @@ void dml32_CalculateSwathAndDETConfiguration( *ViewportSizeSupport = true; for (k = 0; k < NumberOfActiveSurfaces; ++k) { - DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] == + st_vars->DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k, - DETBufferSizeInKByteForSwathCalculation); + st_vars->DETBufferSizeInKByteForSwathCalculation); #endif - if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <= - DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { - SwathHeightY[k] = MaximumSwathHeightY[k]; - SwathHeightC[k] = MaximumSwathHeightC[k]; - RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; - RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; - } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] && - RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <= - DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { - SwathHeightY[k] = MaximumSwathHeightY[k] / 2; - SwathHeightC[k] = MaximumSwathHeightC[k]; - RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; - RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; - } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] && - RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <= - DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { - SwathHeightY[k] = MaximumSwathHeightY[k]; - SwathHeightC[k] = MaximumSwathHeightC[k] / 2; - RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; - RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; + if (st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] <= + st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + SwathHeightY[k] = st_vars->MaximumSwathHeightY[k]; + SwathHeightC[k] = st_vars->MaximumSwathHeightC[k]; + st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k]; + st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k]; + } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] && + st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] <= + st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2; + SwathHeightC[k] = st_vars->MaximumSwathHeightC[k]; + st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2; + st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k]; + } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] < 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] && + st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 <= + st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { + SwathHeightY[k] = st_vars->MaximumSwathHeightY[k]; + SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2; + st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k]; + st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2; } else { - SwathHeightY[k] = MaximumSwathHeightY[k] / 2; - SwathHeightC[k] = MaximumSwathHeightC[k] / 2; - RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; - RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; + SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2; + SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2; + st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2; + st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2; } - if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 > - DETBufferSizeInKByteForSwathCalculation * 1024 / 2) + if ((st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 > + st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { *ViewportSizeSupport = false; @@ -643,7 +636,7 @@ void dml32_CalculateSwathAndDETConfiguration( #endif DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024; DETBufferSizeC[k] = 0; - } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { + } else if (st_vars->RoundedUpSwathSizeBytesY <= 1.5 * st_vars->RoundedUpSwathSizeBytesC) { #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k); #endif @@ -661,11 +654,11 @@ void dml32_CalculateSwathAndDETConfiguration( dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]); dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, - k, RoundedUpMaxSwathSizeBytesY[k]); + k, st_vars->RoundedUpMaxSwathSizeBytesY[k]); dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, - k, RoundedUpMaxSwathSizeBytesC[k]); - dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY); - dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC); + k, st_vars->RoundedUpMaxSwathSizeBytesC[k]); + dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesY); + dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesC); dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index d293856ba906b..158cfa2af1af3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -30,6 +30,7 @@ #include "os_types.h" #include "../dc_features.h" #include "../display_mode_structs.h" +#include "dml/display_mode_vba.h" unsigned int dml32_dscceComputeDelay( unsigned int bpc, @@ -81,6 +82,7 @@ void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput( double *DPPCLKUsingSingleDPP); void dml32_CalculateSwathAndDETConfiguration( + struct dml32_CalculateSwathAndDETConfiguration *st_vars, unsigned int DETSizeOverride[], enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], unsigned int ConfigReturnBufferSizeInKByte, diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index 492aec634b685..6cf814c2cc8de 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -182,6 +182,20 @@ void Calculate256BBlockSizes( unsigned int *BlockWidth256BytesY, unsigned int *BlockWidth256BytesC); +struct dml32_CalculateSwathAndDETConfiguration { + unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX]; + unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX]; + unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX]; + unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX]; + unsigned int RoundedUpSwathSizeBytesY; + unsigned int RoundedUpSwathSizeBytesC; + double SwathWidthdoubleDPP[DC__NUM_DPP__MAX]; + double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX]; + unsigned int TotalActiveDPP; + bool NoChromaSurfaces; + unsigned int DETBufferSizeInKByteForSwathCalculation; +}; + struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation { unsigned int dummy_integer_array[2][DC__NUM_DPP__MAX]; double dummy_single_array[2][DC__NUM_DPP__MAX]; @@ -253,6 +267,7 @@ struct dummy_vars { struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation; struct dml32_ModeSupportAndSystemConfigurationFull dml32_ModeSupportAndSystemConfigurationFull; + struct dml32_CalculateSwathAndDETConfiguration dml32_CalculateSwathAndDETConfiguration; }; struct vba_vars_st { From c3b3f9ba25e6cbe59673505fbc5fff6c4cda0ef7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 20 Jul 2022 23:25:25 -0400 Subject: [PATCH 097/134] drm/amd/display: reduce stack for dml32_CalculateVMRowAndSwath Move stack variables to dummy structure. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Cc: Stephen Rothwell --- .../dc/dml/dcn32/display_mode_vba_32.c | 2 + .../dc/dml/dcn32/display_mode_vba_util_32.c | 110 ++++++++---------- .../dc/dml/dcn32/display_mode_vba_util_32.h | 1 + .../drm/amd/display/dc/dml/display_mode_vba.h | 19 +++ 4 files changed, 70 insertions(+), 62 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index c0e537731c1f8..0ecc9e4c52a68 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -461,6 +461,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman { dml32_CalculateVMRowAndSwath( + &v->dummy_vars.dml32_CalculateVMRowAndSwath, mode_lib->vba.NumberOfActiveSurfaces, v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.SurfaceParameters, v->SurfaceSizeInMALL, @@ -2676,6 +2677,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l { dml32_CalculateVMRowAndSwath( + &v->dummy_vars.dml32_CalculateVMRowAndSwath, mode_lib->vba.NumberOfActiveSurfaces, v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SurfParameters, mode_lib->vba.SurfaceSizeInMALL, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index da5befd7fdec9..9ebd3207ce42b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -1867,6 +1867,7 @@ void dml32_CalculateSurfaceSizeInMall( } // CalculateSurfaceSizeInMall void dml32_CalculateVMRowAndSwath( + struct dml32_CalculateVMRowAndSwath *st_vars, unsigned int NumberOfActiveSurfaces, DmlPipe myPipe[], unsigned int SurfaceSizeInMALL[], @@ -1932,21 +1933,6 @@ void dml32_CalculateVMRowAndSwath( unsigned int BIGK_FRAGMENT_SIZE[]) { unsigned int k; - unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX]; - unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX]; - unsigned int PDEAndMetaPTEBytesFrameY; - unsigned int PDEAndMetaPTEBytesFrameC; - unsigned int MetaRowByteY[DC__NUM_DPP__MAX]; - unsigned int MetaRowByteC[DC__NUM_DPP__MAX]; - unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX]; - unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX]; - unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX]; - unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX]; - unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; - unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX]; - unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; - unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX]; - bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX]; for (k = 0; k < NumberOfActiveSurfaces; ++k) { if (HostVMEnable == true) { @@ -1968,15 +1954,15 @@ void dml32_CalculateVMRowAndSwath( myPipe[k].SourcePixelFormat == dm_rgbe_alpha) { if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) && !IsVertical(myPipe[k].SourceRotation)) { - PTEBufferSizeInRequestsForLuma[k] = + st_vars->PTEBufferSizeInRequestsForLuma[k] = (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2; - PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k]; + st_vars->PTEBufferSizeInRequestsForChroma[k] = st_vars->PTEBufferSizeInRequestsForLuma[k]; } else { - PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma; - PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma; + st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma; + st_vars->PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma; } - PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes( + st_vars->PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes( myPipe[k].ViewportStationary, myPipe[k].DCCEnable, myPipe[k].DPPPerSurface, @@ -1996,21 +1982,21 @@ void dml32_CalculateVMRowAndSwath( GPUVMMaxPageTableLevels, GPUVMMinPageSizeKBytes[k], HostVMMinPageSize, - PTEBufferSizeInRequestsForChroma[k], + st_vars->PTEBufferSizeInRequestsForChroma[k], myPipe[k].PitchC, myPipe[k].DCCMetaPitchC, myPipe[k].BlockWidthC, myPipe[k].BlockHeightC, /* Output */ - &MetaRowByteC[k], - &PixelPTEBytesPerRowC[k], + &st_vars->MetaRowByteC[k], + &st_vars->PixelPTEBytesPerRowC[k], &dpte_row_width_chroma_ub[k], &dpte_row_height_chroma[k], &dpte_row_height_linear_chroma[k], - &PixelPTEBytesPerRowC_one_row_per_frame[k], - &dpte_row_width_chroma_ub_one_row_per_frame[k], - &dpte_row_height_chroma_one_row_per_frame[k], + &st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k], + &st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k], + &st_vars->dpte_row_height_chroma_one_row_per_frame[k], &meta_req_width_chroma[k], &meta_req_height_chroma[k], &meta_row_width_chroma[k], @@ -2038,19 +2024,19 @@ void dml32_CalculateVMRowAndSwath( &VInitPreFillC[k], &MaxNumSwathC[k]); } else { - PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma; - PTEBufferSizeInRequestsForChroma[k] = 0; - PixelPTEBytesPerRowC[k] = 0; - PDEAndMetaPTEBytesFrameC = 0; - MetaRowByteC[k] = 0; + st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma; + st_vars->PTEBufferSizeInRequestsForChroma[k] = 0; + st_vars->PixelPTEBytesPerRowC[k] = 0; + st_vars->PDEAndMetaPTEBytesFrameC = 0; + st_vars->MetaRowByteC[k] = 0; MaxNumSwathC[k] = 0; PrefetchSourceLinesC[k] = 0; - dpte_row_height_chroma_one_row_per_frame[k] = 0; - dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; - PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; + st_vars->dpte_row_height_chroma_one_row_per_frame[k] = 0; + st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; + st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; } - PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes( + st_vars->PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes( myPipe[k].ViewportStationary, myPipe[k].DCCEnable, myPipe[k].DPPPerSurface, @@ -2070,21 +2056,21 @@ void dml32_CalculateVMRowAndSwath( GPUVMMaxPageTableLevels, GPUVMMinPageSizeKBytes[k], HostVMMinPageSize, - PTEBufferSizeInRequestsForLuma[k], + st_vars->PTEBufferSizeInRequestsForLuma[k], myPipe[k].PitchY, myPipe[k].DCCMetaPitchY, myPipe[k].BlockWidthY, myPipe[k].BlockHeightY, /* Output */ - &MetaRowByteY[k], - &PixelPTEBytesPerRowY[k], + &st_vars->MetaRowByteY[k], + &st_vars->PixelPTEBytesPerRowY[k], &dpte_row_width_luma_ub[k], &dpte_row_height_luma[k], &dpte_row_height_linear_luma[k], - &PixelPTEBytesPerRowY_one_row_per_frame[k], - &dpte_row_width_luma_ub_one_row_per_frame[k], - &dpte_row_height_luma_one_row_per_frame[k], + &st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k], + &st_vars->dpte_row_width_luma_ub_one_row_per_frame[k], + &st_vars->dpte_row_height_luma_one_row_per_frame[k], &meta_req_width[k], &meta_req_height[k], &meta_row_width[k], @@ -2112,19 +2098,19 @@ void dml32_CalculateVMRowAndSwath( &VInitPreFillY[k], &MaxNumSwathY[k]); - PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; - MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k]; + PDEAndMetaPTEBytesFrame[k] = st_vars->PDEAndMetaPTEBytesFrameY + st_vars->PDEAndMetaPTEBytesFrameC; + MetaRowByte[k] = st_vars->MetaRowByteY[k] + st_vars->MetaRowByteC[k]; - if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] && - PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) { + if (st_vars->PixelPTEBytesPerRowY[k] <= 64 * st_vars->PTEBufferSizeInRequestsForLuma[k] && + st_vars->PixelPTEBytesPerRowC[k] <= 64 * st_vars->PTEBufferSizeInRequestsForChroma[k]) { PTEBufferSizeNotExceeded[k] = true; } else { PTEBufferSizeNotExceeded[k] = false; } - one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * - PTEBufferSizeInRequestsForLuma[k] && - PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]); + st_vars->one_row_per_frame_fits_in_buffer[k] = (st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * + st_vars->PTEBufferSizeInRequestsForLuma[k] && + st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * st_vars->PTEBufferSizeInRequestsForChroma[k]); } dml32_CalculateMALLUseForStaticScreen( @@ -2132,7 +2118,7 @@ void dml32_CalculateVMRowAndSwath( MALLAllocatedForDCN, UseMALLForStaticScreen, // mode SurfaceSizeInMALL, - one_row_per_frame_fits_in_buffer, + st_vars->one_row_per_frame_fits_in_buffer, /* Output */ UsesMALLForStaticScreen); // boolen @@ -2158,13 +2144,13 @@ void dml32_CalculateVMRowAndSwath( !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame); if (use_one_row_for_frame[k]) { - dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k]; - dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k]; - PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k]; - dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k]; - dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k]; - PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k]; - PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k]; + dpte_row_height_luma[k] = st_vars->dpte_row_height_luma_one_row_per_frame[k]; + dpte_row_width_luma_ub[k] = st_vars->dpte_row_width_luma_ub_one_row_per_frame[k]; + st_vars->PixelPTEBytesPerRowY[k] = st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k]; + dpte_row_height_chroma[k] = st_vars->dpte_row_height_chroma_one_row_per_frame[k]; + dpte_row_width_chroma_ub[k] = st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k]; + st_vars->PixelPTEBytesPerRowC[k] = st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k]; + PTEBufferSizeNotExceeded[k] = st_vars->one_row_per_frame_fits_in_buffer[k]; } if (MetaRowByte[k] <= DCCMetaBufferSizeBytes) @@ -2172,7 +2158,7 @@ void dml32_CalculateVMRowAndSwath( else DCCMetaBufferSizeNotExceeded[k] = false; - PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k]; + PixelPTEBytesPerRow[k] = st_vars->PixelPTEBytesPerRowY[k] + st_vars->PixelPTEBytesPerRowC[k]; if (use_one_row_for_frame[k]) PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2; @@ -2183,11 +2169,11 @@ void dml32_CalculateVMRowAndSwath( myPipe[k].VRatioChroma, myPipe[k].DCCEnable, myPipe[k].HTotal / myPipe[k].PixelClock, - MetaRowByteY[k], MetaRowByteC[k], + st_vars->MetaRowByteY[k], st_vars->MetaRowByteC[k], meta_row_height[k], meta_row_height_chroma[k], - PixelPTEBytesPerRowY[k], - PixelPTEBytesPerRowC[k], + st_vars->PixelPTEBytesPerRowY[k], + st_vars->PixelPTEBytesPerRowC[k], dpte_row_height_luma[k], dpte_row_height_chroma[k], @@ -2203,12 +2189,12 @@ void dml32_CalculateVMRowAndSwath( dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]); dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n", __func__, k, dpte_row_width_luma_ub[k]); - dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]); + dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, st_vars->PixelPTEBytesPerRowY[k]); dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n", __func__, k, dpte_row_height_chroma[k]); dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n", __func__, k, dpte_row_width_chroma_ub[k]); - dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]); + dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, st_vars->PixelPTEBytesPerRowC[k]); dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]); dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n", __func__, k, PTEBufferSizeNotExceeded[k]); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index 158cfa2af1af3..974006e5ecb73 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -362,6 +362,7 @@ void dml32_CalculateSurfaceSizeInMall( bool *ExceededMALLSize); void dml32_CalculateVMRowAndSwath( + struct dml32_CalculateVMRowAndSwath *st_vars, unsigned int NumberOfActiveSurfaces, DmlPipe myPipe[], unsigned int SurfaceSizeInMALL[], diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index 6cf814c2cc8de..b326184cfa4a2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -196,6 +196,24 @@ struct dml32_CalculateSwathAndDETConfiguration { unsigned int DETBufferSizeInKByteForSwathCalculation; }; +struct dml32_CalculateVMRowAndSwath { + unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX]; + unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX]; + unsigned int PDEAndMetaPTEBytesFrameY; + unsigned int PDEAndMetaPTEBytesFrameC; + unsigned int MetaRowByteY[DC__NUM_DPP__MAX]; + unsigned int MetaRowByteC[DC__NUM_DPP__MAX]; + unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX]; + unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX]; + unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX]; + unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX]; + unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; + unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX]; + unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; + unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX]; + bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX]; +}; + struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation { unsigned int dummy_integer_array[2][DC__NUM_DPP__MAX]; double dummy_single_array[2][DC__NUM_DPP__MAX]; @@ -268,6 +286,7 @@ struct dummy_vars { DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation; struct dml32_ModeSupportAndSystemConfigurationFull dml32_ModeSupportAndSystemConfigurationFull; struct dml32_CalculateSwathAndDETConfiguration dml32_CalculateSwathAndDETConfiguration; + struct dml32_CalculateVMRowAndSwath dml32_CalculateVMRowAndSwath; }; struct vba_vars_st { From 3c3abac60117cfd09460980d9a14c253b37f7b00 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 20 Jul 2022 23:50:08 -0400 Subject: [PATCH 098/134] drm/amd/display: reduce stack for dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport Move stack variables to dummy structure. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Cc: Stephen Rothwell --- .../dc/dml/dcn32/display_mode_vba_32.c | 2 + .../dc/dml/dcn32/display_mode_vba_util_32.c | 187 ++++++++---------- .../dc/dml/dcn32/display_mode_vba_util_32.h | 1 + .../drm/amd/display/dc/dml/display_mode_vba.h | 34 ++++ 4 files changed, 120 insertions(+), 104 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 0ecc9e4c52a68..f7d108123b072 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -1165,6 +1165,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SMNLatency = mode_lib->vba.SMNLatency; dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + &v->dummy_vars.dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport, mode_lib->vba.USRRetrainingRequiredFinal, mode_lib->vba.UsesMALLForPStateChange, mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], @@ -3493,6 +3494,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l { dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + &v->dummy_vars.dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport, mode_lib->vba.USRRetrainingRequiredFinal, mode_lib->vba.UsesMALLForPStateChange, mode_lib->vba.PrefetchModePerState[i][j], diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 9ebd3207ce42b..40b4c88ff2e7d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -4185,6 +4185,7 @@ void dml32_CalculateFlipSchedule( } // CalculateFlipSchedule void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars, bool USRRetrainingRequiredFinal, enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], unsigned int PrefetchMode, @@ -4246,37 +4247,15 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( double ActiveDRAMClockChangeLatencyMargin[]) { unsigned int i, j, k; - unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0; - unsigned int DRAMClockChangeSupportNumber = 0; - unsigned int LastSurfaceWithoutMargin; - unsigned int DRAMClockChangeMethod = 0; - bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false; - double MinActiveFCLKChangeMargin = 0.; - double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.; - double ActiveClockChangeLatencyHidingY; - double ActiveClockChangeLatencyHidingC; - double ActiveClockChangeLatencyHiding; - double EffectiveDETBufferSizeY; - double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX]; - double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX]; - double TotalPixelBW = 0.0; - bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX]; - double EffectiveLBLatencyHidingY; - double EffectiveLBLatencyHidingC; - double LinesInDETY[DC__NUM_DPP__MAX]; - double LinesInDETC[DC__NUM_DPP__MAX]; - unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; - unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX]; - double FullDETBufferingTimeY; - double FullDETBufferingTimeC; - double WritebackDRAMClockChangeLatencyMargin; - double WritebackFCLKChangeLatencyMargin; - double WritebackLatencyHiding; - bool SameTimingForFCLKChange; - - unsigned int TotalActiveWriteback = 0; - unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX]; - unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX]; + + st_vars->SurfaceWithMinActiveFCLKChangeMargin = 0; + st_vars->DRAMClockChangeSupportNumber = 0; + st_vars->DRAMClockChangeMethod = 0; + st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false; + st_vars->MinActiveFCLKChangeMargin = 0.; + st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.; + st_vars->TotalPixelBW = 0.0; + st_vars->TotalActiveWriteback = 0; Watermark->UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency; Watermark->USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency @@ -4308,13 +4287,13 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( #endif - TotalActiveWriteback = 0; + st_vars->TotalActiveWriteback = 0; for (k = 0; k < NumberOfActiveSurfaces; ++k) { if (WritebackEnable[k] == true) - TotalActiveWriteback = TotalActiveWriteback + 1; + st_vars->TotalActiveWriteback = st_vars->TotalActiveWriteback + 1; } - if (TotalActiveWriteback <= 1) { + if (st_vars->TotalActiveWriteback <= 1) { Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency; } else { Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency @@ -4324,7 +4303,7 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( Watermark->WritebackUrgentWatermark = Watermark->WritebackUrgentWatermark + mmSOCParameters.USRRetrainingLatency; - if (TotalActiveWriteback <= 1) { + if (st_vars->TotalActiveWriteback <= 1) { Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.WritebackLatency; Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency @@ -4354,14 +4333,14 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( #endif for (k = 0; k < NumberOfActiveSurfaces; ++k) { - TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + + st_vars->TotalPixelBW = st_vars->TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) / (HTotal[k] / PixelClock[k]); } for (k = 0; k < NumberOfActiveSurfaces; ++k) { - LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1); - LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1); + st_vars->LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1); + st_vars->LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1); #ifdef __DML_VBA_DEBUG__ @@ -4372,72 +4351,72 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( dml_print("DML::%s: k=%d, VTaps = %d\n", __func__, k, VTaps[k]); #endif - EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]); - EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]); - EffectiveDETBufferSizeY = DETBufferSizeY[k]; + st_vars->EffectiveLBLatencyHidingY = st_vars->LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]); + st_vars->EffectiveLBLatencyHidingC = st_vars->LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]); + st_vars->EffectiveDETBufferSizeY = DETBufferSizeY[k]; if (UnboundedRequestEnabled) { - EffectiveDETBufferSizeY = EffectiveDETBufferSizeY + st_vars->EffectiveDETBufferSizeY = st_vars->EffectiveDETBufferSizeY + CompressedBufferSizeInkByte * 1024 * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k]) - / (HTotal[k] / PixelClock[k]) / TotalPixelBW; + / (HTotal[k] / PixelClock[k]) / st_vars->TotalPixelBW; } - LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; - LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); - FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k]; + st_vars->LinesInDETY[k] = (double) st_vars->EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; + st_vars->LinesInDETYRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETY[k], SwathHeightY[k]); + st_vars->FullDETBufferingTimeY = st_vars->LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k]; - ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY + st_vars->ActiveClockChangeLatencyHidingY = st_vars->EffectiveLBLatencyHidingY + st_vars->FullDETBufferingTimeY - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k]; if (NumberOfActiveSurfaces > 1) { - ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY + st_vars->ActiveClockChangeLatencyHidingY = st_vars->ActiveClockChangeLatencyHidingY - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k]; } if (BytePerPixelDETC[k] > 0) { - LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; - LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]); - FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) + st_vars->LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; + st_vars->LinesInDETCRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETC[k], SwathHeightC[k]); + st_vars->FullDETBufferingTimeC = st_vars->LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatioChroma[k]; - ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC + st_vars->ActiveClockChangeLatencyHidingC = st_vars->EffectiveLBLatencyHidingC + st_vars->FullDETBufferingTimeC - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k]; if (NumberOfActiveSurfaces > 1) { - ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC + st_vars->ActiveClockChangeLatencyHidingC = st_vars->ActiveClockChangeLatencyHidingC - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k]; } - ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY, - ActiveClockChangeLatencyHidingC); + st_vars->ActiveClockChangeLatencyHiding = dml_min(st_vars->ActiveClockChangeLatencyHidingY, + st_vars->ActiveClockChangeLatencyHidingC); } else { - ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY; + st_vars->ActiveClockChangeLatencyHiding = st_vars->ActiveClockChangeLatencyHidingY; } - ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark + ActiveDRAMClockChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark - Watermark->DRAMClockChangeWatermark; - ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark + st_vars->ActiveFCLKChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark - Watermark->FCLKChangeWatermark; - USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark; + st_vars->USRRetrainingLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark; if (WritebackEnable[k]) { - WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024 + st_vars->WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4); if (WritebackPixelFormat[k] == dm_444_64) - WritebackLatencyHiding = WritebackLatencyHiding / 2; + st_vars->WritebackLatencyHiding = st_vars->WritebackLatencyHiding / 2; - WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding + st_vars->WritebackDRAMClockChangeLatencyMargin = st_vars->WritebackLatencyHiding - Watermark->WritebackDRAMClockChangeWatermark; - WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding + st_vars->WritebackFCLKChangeLatencyMargin = st_vars->WritebackLatencyHiding - Watermark->WritebackFCLKChangeWatermark; ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k], - WritebackFCLKChangeLatencyMargin); - ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k], - WritebackDRAMClockChangeLatencyMargin); + st_vars->WritebackFCLKChangeLatencyMargin); + st_vars->ActiveFCLKChangeLatencyMargin[k] = dml_min(st_vars->ActiveFCLKChangeLatencyMargin[k], + st_vars->WritebackDRAMClockChangeLatencyMargin); } MaxActiveDRAMClockChangeLatencySupported[k] = (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ? @@ -4456,41 +4435,41 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( HTotal[i] == HTotal[j] && VTotal[i] == VTotal[j] && VActive[i] == VActive[j]) || (SynchronizeDRRDisplaysForUCLKPStateChangeFinal && (DRRDisplay[i] || DRRDisplay[j]))) { - SynchronizedSurfaces[i][j] = true; + st_vars->SynchronizedSurfaces[i][j] = true; } else { - SynchronizedSurfaces[i][j] = false; + st_vars->SynchronizedSurfaces[i][j] = false; } } } for (k = 0; k < NumberOfActiveSurfaces; ++k) { if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && - (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin || - ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) { - FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true; - MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k]; - SurfaceWithMinActiveFCLKChangeMargin = k; + (!st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin || + st_vars->ActiveFCLKChangeLatencyMargin[k] < st_vars->MinActiveFCLKChangeMargin)) { + st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true; + st_vars->MinActiveFCLKChangeMargin = st_vars->ActiveFCLKChangeLatencyMargin[k]; + st_vars->SurfaceWithMinActiveFCLKChangeMargin = k; } } - *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency; + *MinActiveFCLKChangeLatencySupported = st_vars->MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency; - SameTimingForFCLKChange = true; + st_vars->SameTimingForFCLKChange = true; for (k = 0; k < NumberOfActiveSurfaces; ++k) { - if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) { + if (!st_vars->SynchronizedSurfaces[k][st_vars->SurfaceWithMinActiveFCLKChangeMargin]) { if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && - (SameTimingForFCLKChange || - ActiveFCLKChangeLatencyMargin[k] < - SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) { - SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k]; + (st_vars->SameTimingForFCLKChange || + st_vars->ActiveFCLKChangeLatencyMargin[k] < + st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) { + st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = st_vars->ActiveFCLKChangeLatencyMargin[k]; } - SameTimingForFCLKChange = false; + st_vars->SameTimingForFCLKChange = false; } } - if (MinActiveFCLKChangeMargin > 0) { + if (st_vars->MinActiveFCLKChangeMargin > 0) { *FCLKChangeSupport = dm_fclock_change_vactive; - } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) && + } else if ((st_vars->SameTimingForFCLKChange || st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) && (PrefetchMode <= 1)) { *FCLKChangeSupport = dm_fclock_change_vblank; } else { @@ -4500,7 +4479,7 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( *USRRetrainingSupport = true; for (k = 0; k < NumberOfActiveSurfaces; ++k) { if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && - (USRRetrainingLatencyMargin[k] < 0)) { + (st_vars->USRRetrainingLatencyMargin[k] < 0)) { *USRRetrainingSupport = false; } } @@ -4511,42 +4490,42 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe && ActiveDRAMClockChangeLatencyMargin[k] < 0) { if (PrefetchMode > 0) { - DRAMClockChangeSupportNumber = 2; - } else if (DRAMClockChangeSupportNumber == 0) { - DRAMClockChangeSupportNumber = 1; - LastSurfaceWithoutMargin = k; - } else if (DRAMClockChangeSupportNumber == 1 && - !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) { - DRAMClockChangeSupportNumber = 2; + st_vars->DRAMClockChangeSupportNumber = 2; + } else if (st_vars->DRAMClockChangeSupportNumber == 0) { + st_vars->DRAMClockChangeSupportNumber = 1; + st_vars->LastSurfaceWithoutMargin = k; + } else if (st_vars->DRAMClockChangeSupportNumber == 1 && + !st_vars->SynchronizedSurfaces[st_vars->LastSurfaceWithoutMargin][k]) { + st_vars->DRAMClockChangeSupportNumber = 2; } } } for (k = 0; k < NumberOfActiveSurfaces; ++k) { if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame) - DRAMClockChangeMethod = 1; + st_vars->DRAMClockChangeMethod = 1; else if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) - DRAMClockChangeMethod = 2; + st_vars->DRAMClockChangeMethod = 2; } - if (DRAMClockChangeMethod == 0) { - if (DRAMClockChangeSupportNumber == 0) + if (st_vars->DRAMClockChangeMethod == 0) { + if (st_vars->DRAMClockChangeSupportNumber == 0) *DRAMClockChangeSupport = dm_dram_clock_change_vactive; - else if (DRAMClockChangeSupportNumber == 1) + else if (st_vars->DRAMClockChangeSupportNumber == 1) *DRAMClockChangeSupport = dm_dram_clock_change_vblank; else *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; - } else if (DRAMClockChangeMethod == 1) { - if (DRAMClockChangeSupportNumber == 0) + } else if (st_vars->DRAMClockChangeMethod == 1) { + if (st_vars->DRAMClockChangeSupportNumber == 0) *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame; - else if (DRAMClockChangeSupportNumber == 1) + else if (st_vars->DRAMClockChangeSupportNumber == 1) *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame; else *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; } else { - if (DRAMClockChangeSupportNumber == 0) + if (st_vars->DRAMClockChangeSupportNumber == 0) *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp; - else if (DRAMClockChangeSupportNumber == 1) + else if (st_vars->DRAMClockChangeSupportNumber == 1) *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp; else *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; @@ -4560,7 +4539,7 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (HTotal[k] / PixelClock[k]), 1); src_y_pstate_l = dml_ceil(dst_y_pstate * VRatio[k], SwathHeightY[k]); - src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k]; + src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + st_vars->LBLatencyHidingSourceLinesY[k]; sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + meta_row_height[k]; #ifdef __DML_VBA_DEBUG__ @@ -4568,7 +4547,7 @@ dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DET dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]); dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); -dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]); +dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, st_vars->LBLatencyHidingSourceLinesY[k]); dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate); dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l); dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l); @@ -4579,7 +4558,7 @@ dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l if (BytePerPixelDETC[k] > 0) { src_y_pstate_c = dml_ceil(dst_y_pstate * VRatioChroma[k], SwathHeightC[k]); - src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k]; + src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + st_vars->LBLatencyHidingSourceLinesC[k]; sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + meta_row_height_chroma[k]; SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index 974006e5ecb73..ecd4f1e7d1f79 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -810,6 +810,7 @@ void dml32_CalculateFlipSchedule( bool *ImmediateFlipSupportedForPipe); void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( + struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars, bool USRRetrainingRequiredFinal, enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], unsigned int PrefetchMode, diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index b326184cfa4a2..42e4e4c5e656e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -214,6 +214,39 @@ struct dml32_CalculateVMRowAndSwath { bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX]; }; +struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport { + unsigned int SurfaceWithMinActiveFCLKChangeMargin; + unsigned int DRAMClockChangeSupportNumber; + unsigned int LastSurfaceWithoutMargin; + unsigned int DRAMClockChangeMethod; + bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin; + double MinActiveFCLKChangeMargin; + double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank; + double ActiveClockChangeLatencyHidingY; + double ActiveClockChangeLatencyHidingC; + double ActiveClockChangeLatencyHiding; + double EffectiveDETBufferSizeY; + double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX]; + double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX]; + double TotalPixelBW; + bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX]; + double EffectiveLBLatencyHidingY; + double EffectiveLBLatencyHidingC; + double LinesInDETY[DC__NUM_DPP__MAX]; + double LinesInDETC[DC__NUM_DPP__MAX]; + unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; + unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX]; + double FullDETBufferingTimeY; + double FullDETBufferingTimeC; + double WritebackDRAMClockChangeLatencyMargin; + double WritebackFCLKChangeLatencyMargin; + double WritebackLatencyHiding; + bool SameTimingForFCLKChange; + unsigned int TotalActiveWriteback; + unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX]; + unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX]; +}; + struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation { unsigned int dummy_integer_array[2][DC__NUM_DPP__MAX]; double dummy_single_array[2][DC__NUM_DPP__MAX]; @@ -287,6 +320,7 @@ struct dummy_vars { struct dml32_ModeSupportAndSystemConfigurationFull dml32_ModeSupportAndSystemConfigurationFull; struct dml32_CalculateSwathAndDETConfiguration dml32_CalculateSwathAndDETConfiguration; struct dml32_CalculateVMRowAndSwath dml32_CalculateVMRowAndSwath; + struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport; }; struct vba_vars_st { From 86e4863e67a9bd1e257f162f3d740ebb61206c91 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 21 Jul 2022 00:35:35 -0400 Subject: [PATCH 099/134] drm/amd/display: reduce stack for dml32_CalculatePrefetchSchedule Move stack variables to dummy structure. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Cc: Stephen Rothwell --- .../dc/dml/dcn32/display_mode_vba_32.c | 5 +- .../dc/dml/dcn32/display_mode_vba_util_32.c | 394 ++++++++---------- .../dc/dml/dcn32/display_mode_vba_util_32.h | 1 + .../drm/amd/display/dc/dml/display_mode_vba.h | 38 ++ 4 files changed, 227 insertions(+), 211 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index f7d108123b072..db3e43499a26d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -757,7 +757,9 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelY = v->BytePerPixelY[k]; v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelC = v->BytePerPixelC[k]; v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP; - v->ErrorResult[k] = dml32_CalculatePrefetchSchedule(v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor, + v->ErrorResult[k] = dml32_CalculatePrefetchSchedule( + &v->dummy_vars.dml32_CalculatePrefetchSchedule, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor, &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe, v->DSCDelay[k], mode_lib->vba.DPPCLKDelaySubtotal + mode_lib->vba.DPPCLKDelayCNVCFormater, mode_lib->vba.DPPCLKDelaySCL, @@ -3195,6 +3197,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.NoTimeForPrefetch[i][j][k] = dml32_CalculatePrefetchSchedule( + &v->dummy_vars.dml32_CalculatePrefetchSchedule, v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe, mode_lib->vba.DSCDelayPerState[i][k], diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 40b4c88ff2e7d..4b010b1b8aed2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -3342,6 +3342,7 @@ double dml32_CalculateExtraLatency( } // CalculateExtraLatency bool dml32_CalculatePrefetchSchedule( + struct dml32_CalculatePrefetchSchedule *st_vars, double HostVMInefficiencyFactor, DmlPipe *myPipe, unsigned int DSCDelay, @@ -3405,45 +3406,18 @@ bool dml32_CalculatePrefetchSchedule( double *VReadyOffsetPix) { bool MyError = false; - unsigned int DPPCycles, DISPCLKCycles; - double DSTTotalPixelsAfterScaler; - double LineTime; - double dst_y_prefetch_equ; - double prefetch_bw_oto; - double Tvm_oto; - double Tr0_oto; - double Tvm_oto_lines; - double Tr0_oto_lines; - double dst_y_prefetch_oto; - double TimeForFetchingMetaPTE = 0; - double TimeForFetchingRowInVBlank = 0; - double LinesToRequestPrefetchPixelData = 0; - unsigned int HostVMDynamicLevelsTrips; - double trip_to_mem; - double Tvm_trips; - double Tr0_trips; - double Tvm_trips_rounded; - double Tr0_trips_rounded; - double Lsw_oto; - double Tpre_rounded; - double prefetch_bw_equ; - double Tvm_equ; - double Tr0_equ; - double Tdmbf; - double Tdmec; - double Tdmsks; - double prefetch_sw_bytes; - double bytes_pp; - double dep_bytes; - unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__; - double min_Lsw; - double Tsw_est1 = 0; - double Tsw_est3 = 0; + + st_vars->TimeForFetchingMetaPTE = 0; + st_vars->TimeForFetchingRowInVBlank = 0; + st_vars->LinesToRequestPrefetchPixelData = 0; + st_vars->max_vratio_pre = __DML_MAX_VRATIO_PRE__; + st_vars->Tsw_est1 = 0; + st_vars->Tsw_est3 = 0; if (GPUVMEnable == true && HostVMEnable == true) - HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; + st_vars->HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; else - HostVMDynamicLevelsTrips = 0; + st_vars->HostVMDynamicLevelsTrips = 0; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); dml_print("DML::%s: GPUVMPageTableLevels = %d\n", __func__, GPUVMPageTableLevels); @@ -3466,19 +3440,19 @@ bool dml32_CalculatePrefetchSchedule( TSetup, /* output */ - &Tdmbf, - &Tdmec, - &Tdmsks, + &st_vars->Tdmbf, + &st_vars->Tdmec, + &st_vars->Tdmsks, VUpdateOffsetPix, VUpdateWidthPix, VReadyOffsetPix); - LineTime = myPipe->HTotal / myPipe->PixelClock; - trip_to_mem = UrgentLatency; - Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); + st_vars->LineTime = myPipe->HTotal / myPipe->PixelClock; + st_vars->trip_to_mem = UrgentLatency; + st_vars->Tvm_trips = UrgentExtraLatency + st_vars->trip_to_mem * (GPUVMPageTableLevels * (st_vars->HostVMDynamicLevelsTrips + 1) - 1); if (DynamicMetadataVMEnabled == true) - *Tdmdl = TWait + Tvm_trips + trip_to_mem; + *Tdmdl = TWait + st_vars->Tvm_trips + st_vars->trip_to_mem; else *Tdmdl = TWait + UrgentExtraLatency; @@ -3488,15 +3462,15 @@ bool dml32_CalculatePrefetchSchedule( #endif if (DynamicMetadataEnable == true) { - if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { + if (VStartup * st_vars->LineTime < *TSetup + *Tdmdl + st_vars->Tdmbf + st_vars->Tdmec + st_vars->Tdmsks) { *NotEnoughTimeForDynamicMetadata = true; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", - __func__, Tdmbf); - dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); + __func__, st_vars->Tdmbf); + dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec); dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", - __func__, Tdmsks); + __func__, st_vars->Tdmsks); dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl); #endif @@ -3508,21 +3482,21 @@ bool dml32_CalculatePrefetchSchedule( } *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && - GPUVMEnable == true ? TWait + Tvm_trips : 0); + GPUVMEnable == true ? TWait + st_vars->Tvm_trips : 0); if (myPipe->ScalerEnabled) - DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; + st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; else - DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; + st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; - DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; + st_vars->DPPCycles = st_vars->DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; - DISPCLKCycles = DISPCLKDelaySubtotal; + st_vars->DISPCLKCycles = DISPCLKDelaySubtotal; if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0) return true; - *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles * + *DSTXAfterScaler = st_vars->DPPCycles * myPipe->PixelClock / myPipe->Dppclk + st_vars->DISPCLKCycles * myPipe->PixelClock / myPipe->Dispclk + DSCDelay; *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0) @@ -3532,10 +3506,10 @@ bool dml32_CalculatePrefetchSchedule( + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0); #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); + dml_print("DML::%s: DPPCycles: %d\n", __func__, st_vars->DPPCycles); dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk); - dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); + dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, st_vars->DISPCLKCycles); dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk); dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode); @@ -3548,9 +3522,9 @@ bool dml32_CalculatePrefetchSchedule( else *DSTYAfterScaler = 0; - DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; - *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); - *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); + st_vars->DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; + *DSTYAfterScaler = dml_floor(st_vars->DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); + *DSTXAfterScaler = st_vars->DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler); @@ -3558,132 +3532,132 @@ bool dml32_CalculatePrefetchSchedule( MyError = false; - Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); + st_vars->Tr0_trips = st_vars->trip_to_mem * (st_vars->HostVMDynamicLevelsTrips + 1); if (GPUVMEnable == true) { - Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime; - Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; + st_vars->Tvm_trips_rounded = dml_ceil(4.0 * st_vars->Tvm_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime; + st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime; if (GPUVMPageTableLevels >= 3) { - *Tno_bw = UrgentExtraLatency + trip_to_mem * - (double) ((GPUVMPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1); + *Tno_bw = UrgentExtraLatency + st_vars->trip_to_mem * + (double) ((GPUVMPageTableLevels - 2) * (st_vars->HostVMDynamicLevelsTrips + 1) - 1); } else if (GPUVMPageTableLevels == 1 && myPipe->DCCEnable != true) { - Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) / - 4.0 * LineTime; // VBA_ERROR + st_vars->Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / st_vars->LineTime, 1.0) / + 4.0 * st_vars->LineTime; // VBA_ERROR *Tno_bw = UrgentExtraLatency; } else { *Tno_bw = 0; } } else if (myPipe->DCCEnable == true) { - Tvm_trips_rounded = LineTime / 4.0; - Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; + st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0; + st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime; *Tno_bw = 0; } else { - Tvm_trips_rounded = LineTime / 4.0; - Tr0_trips_rounded = LineTime / 2.0; + st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0; + st_vars->Tr0_trips_rounded = st_vars->LineTime / 2.0; *Tno_bw = 0; } - Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0); - Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0); + st_vars->Tvm_trips_rounded = dml_max(st_vars->Tvm_trips_rounded, st_vars->LineTime / 4.0); + st_vars->Tr0_trips_rounded = dml_max(st_vars->Tr0_trips_rounded, st_vars->LineTime / 4.0); if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12) { - bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; + st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; } else { - bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; + st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; } - prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + st_vars->prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; - prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface, - prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime)); + st_vars->prefetch_bw_oto = dml_max(st_vars->bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface, + st_vars->prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * st_vars->LineTime)); - min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre; - min_Lsw = dml_max(min_Lsw, 1.0); - Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0; + st_vars->min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / st_vars->max_vratio_pre; + st_vars->min_Lsw = dml_max(st_vars->min_Lsw, 1.0); + st_vars->Lsw_oto = dml_ceil(4.0 * dml_max(st_vars->prefetch_sw_bytes / st_vars->prefetch_bw_oto / st_vars->LineTime, st_vars->min_Lsw), 1.0) / 4.0; if (GPUVMEnable == true) { - Tvm_oto = dml_max3( - Tvm_trips, - *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, - LineTime / 4.0); + st_vars->Tvm_oto = dml_max3( + st_vars->Tvm_trips, + *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / st_vars->prefetch_bw_oto, + st_vars->LineTime / 4.0); } else - Tvm_oto = LineTime / 4.0; + st_vars->Tvm_oto = st_vars->LineTime / 4.0; if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { - Tr0_oto = dml_max4( - Tr0_trips, - (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, - (LineTime - Tvm_oto)/2.0, - LineTime / 4.0); + st_vars->Tr0_oto = dml_max4( + st_vars->Tr0_trips, + (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto, + (st_vars->LineTime - st_vars->Tvm_oto)/2.0, + st_vars->LineTime / 4.0); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, - (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto); - dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips); - dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto); - dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4); + (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto); + dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, st_vars->Tr0_trips); + dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, st_vars->LineTime - st_vars->Tvm_oto); + dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, st_vars->LineTime / 4); #endif } else - Tr0_oto = (LineTime - Tvm_oto) / 2.0; + st_vars->Tr0_oto = (st_vars->LineTime - st_vars->Tvm_oto) / 2.0; - Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; - Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; - dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; + st_vars->Tvm_oto_lines = dml_ceil(4.0 * st_vars->Tvm_oto / st_vars->LineTime, 1) / 4.0; + st_vars->Tr0_oto_lines = dml_ceil(4.0 * st_vars->Tr0_oto / st_vars->LineTime, 1) / 4.0; + st_vars->dst_y_prefetch_oto = st_vars->Tvm_oto_lines + 2 * st_vars->Tr0_oto_lines + st_vars->Lsw_oto; - dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - + st_vars->dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / st_vars->LineTime - (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal); - dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw); + dml_print("DML::%s: min_Lsw = %f\n", __func__, st_vars->min_Lsw); dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw); dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency); - dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem); + dml_print("DML::%s: trip_to_mem = %f\n", __func__, st_vars->trip_to_mem); dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC); dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub); - dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes); - dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp); + dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, st_vars->prefetch_sw_bytes); + dml_print("DML::%s: bytes_pp = %f\n", __func__, st_vars->bytes_pp); dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); - dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); - dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); - dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); - dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); - dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); - dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines); - dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines); - dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto); - dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto); - dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ); + dml_print("DML::%s: Tvm_trips = %f\n", __func__, st_vars->Tvm_trips); + dml_print("DML::%s: Tr0_trips = %f\n", __func__, st_vars->Tr0_trips); + dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, st_vars->prefetch_bw_oto); + dml_print("DML::%s: Tr0_oto = %f\n", __func__, st_vars->Tr0_oto); + dml_print("DML::%s: Tvm_oto = %f\n", __func__, st_vars->Tvm_oto); + dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, st_vars->Tvm_oto_lines); + dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, st_vars->Tr0_oto_lines); + dml_print("DML::%s: Lsw_oto = %f\n", __func__, st_vars->Lsw_oto); + dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, st_vars->dst_y_prefetch_oto); + dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, st_vars->dst_y_prefetch_equ); #endif - dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; - Tpre_rounded = dst_y_prefetch_equ * LineTime; + st_vars->dst_y_prefetch_equ = dml_floor(4.0 * (st_vars->dst_y_prefetch_equ + 0.125), 1) / 4.0; + st_vars->Tpre_rounded = st_vars->dst_y_prefetch_equ * st_vars->LineTime; #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ); - dml_print("DML::%s: LineTime: %f\n", __func__, LineTime); + dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, st_vars->dst_y_prefetch_equ); + dml_print("DML::%s: LineTime: %f\n", __func__, st_vars->LineTime); dml_print("DML::%s: VStartup: %d\n", __func__, VStartup); dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", - __func__, VStartup * LineTime); + __func__, VStartup * st_vars->LineTime); dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup); dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc); - dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); - dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); + dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, st_vars->Tdmbf); + dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec); dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm); dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl); dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n", __func__, *DSTYAfterScaler); #endif - dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, + st_vars->dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); - if (prefetch_sw_bytes < dep_bytes) - prefetch_sw_bytes = 2 * dep_bytes; + if (st_vars->prefetch_sw_bytes < st_vars->dep_bytes) + st_vars->prefetch_sw_bytes = 2 * st_vars->dep_bytes; *PrefetchBandwidth = 0; *DestinationLinesToRequestVMInVBlank = 0; @@ -3691,61 +3665,61 @@ bool dml32_CalculatePrefetchSchedule( *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; *RequiredPrefetchPixDataBWLuma = 0; - if (dst_y_prefetch_equ > 1) { + if (st_vars->dst_y_prefetch_equ > 1) { double PrefetchBandwidth1; double PrefetchBandwidth2; double PrefetchBandwidth3; double PrefetchBandwidth4; - if (Tpre_rounded - *Tno_bw > 0) { + if (st_vars->Tpre_rounded - *Tno_bw > 0) { PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor - + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); - Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; + + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - *Tno_bw); + st_vars->Tsw_est1 = st_vars->prefetch_sw_bytes / PrefetchBandwidth1; } else PrefetchBandwidth1 = 0; - if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw) - && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { + if (VStartup == MaxVStartup && (st_vars->Tsw_est1 / st_vars->LineTime < st_vars->min_Lsw) + && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw > 0) { PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) - / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); + / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw); } - if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) - PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / - (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); + if (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded > 0) + PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + st_vars->prefetch_sw_bytes) / + (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded); else PrefetchBandwidth2 = 0; - if (Tpre_rounded - Tvm_trips_rounded > 0) { + if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded > 0) { PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor - + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); - Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; + + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded); + st_vars->Tsw_est3 = st_vars->prefetch_sw_bytes / PrefetchBandwidth3; } else PrefetchBandwidth3 = 0; if (VStartup == MaxVStartup && - (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 * - LineTime - Tvm_trips_rounded > 0) { + (st_vars->Tsw_est3 / st_vars->LineTime < st_vars->min_Lsw) && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * + st_vars->LineTime - st_vars->Tvm_trips_rounded > 0) { PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) - / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); + / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - st_vars->Tvm_trips_rounded); } - if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) { - PrefetchBandwidth4 = prefetch_sw_bytes / - (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); + if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded > 0) { + PrefetchBandwidth4 = st_vars->prefetch_sw_bytes / + (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded); } else { PrefetchBandwidth4 = 0; } #ifdef __DML_VBA_DEBUG__ - dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); + dml_print("DML::%s: Tpre_rounded: %f\n", __func__, st_vars->Tpre_rounded); dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw); - dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); - dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1); - dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3); + dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, st_vars->Tvm_trips_rounded); + dml_print("DML::%s: Tsw_est1: %f\n", __func__, st_vars->Tsw_est1); + dml_print("DML::%s: Tsw_est3: %f\n", __func__, st_vars->Tsw_est3); dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1); dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2); dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); @@ -3758,9 +3732,9 @@ bool dml32_CalculatePrefetchSchedule( if (PrefetchBandwidth1 > 0) { if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 - >= Tvm_trips_rounded + >= st_vars->Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) - / PrefetchBandwidth1 >= Tr0_trips_rounded) { + / PrefetchBandwidth1 >= st_vars->Tr0_trips_rounded) { Case1OK = true; } else { Case1OK = false; @@ -3771,9 +3745,9 @@ bool dml32_CalculatePrefetchSchedule( if (PrefetchBandwidth2 > 0) { if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 - >= Tvm_trips_rounded + >= st_vars->Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) - / PrefetchBandwidth2 < Tr0_trips_rounded) { + / PrefetchBandwidth2 < st_vars->Tr0_trips_rounded) { Case2OK = true; } else { Case2OK = false; @@ -3784,9 +3758,9 @@ bool dml32_CalculatePrefetchSchedule( if (PrefetchBandwidth3 > 0) { if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < - Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * + st_vars->Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= - Tr0_trips_rounded) { + st_vars->Tr0_trips_rounded) { Case3OK = true; } else { Case3OK = false; @@ -3796,80 +3770,80 @@ bool dml32_CalculatePrefetchSchedule( } if (Case1OK) - prefetch_bw_equ = PrefetchBandwidth1; + st_vars->prefetch_bw_equ = PrefetchBandwidth1; else if (Case2OK) - prefetch_bw_equ = PrefetchBandwidth2; + st_vars->prefetch_bw_equ = PrefetchBandwidth2; else if (Case3OK) - prefetch_bw_equ = PrefetchBandwidth3; + st_vars->prefetch_bw_equ = PrefetchBandwidth3; else - prefetch_bw_equ = PrefetchBandwidth4; + st_vars->prefetch_bw_equ = PrefetchBandwidth4; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); - dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); + dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, st_vars->prefetch_bw_equ); #endif - if (prefetch_bw_equ > 0) { + if (st_vars->prefetch_bw_equ > 0) { if (GPUVMEnable == true) { - Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * - HostVMInefficiencyFactor / prefetch_bw_equ, - Tvm_trips, LineTime / 4); + st_vars->Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * + HostVMInefficiencyFactor / st_vars->prefetch_bw_equ, + st_vars->Tvm_trips, st_vars->LineTime / 4); } else { - Tvm_equ = LineTime / 4; + st_vars->Tvm_equ = st_vars->LineTime / 4; } if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { - Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow * - HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips, - (LineTime - Tvm_equ) / 2, LineTime / 4); + st_vars->Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow * + HostVMInefficiencyFactor) / st_vars->prefetch_bw_equ, st_vars->Tr0_trips, + (st_vars->LineTime - st_vars->Tvm_equ) / 2, st_vars->LineTime / 4); } else { - Tr0_equ = (LineTime - Tvm_equ) / 2; + st_vars->Tr0_equ = (st_vars->LineTime - st_vars->Tvm_equ) / 2; } } else { - Tvm_equ = 0; - Tr0_equ = 0; + st_vars->Tvm_equ = 0; + st_vars->Tr0_equ = 0; #ifdef __DML_VBA_DEBUG__ dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); #endif } } - if (dst_y_prefetch_oto < dst_y_prefetch_equ) { - *DestinationLinesForPrefetch = dst_y_prefetch_oto; - TimeForFetchingMetaPTE = Tvm_oto; - TimeForFetchingRowInVBlank = Tr0_oto; - *PrefetchBandwidth = prefetch_bw_oto; + if (st_vars->dst_y_prefetch_oto < st_vars->dst_y_prefetch_equ) { + *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_oto; + st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_oto; + st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_oto; + *PrefetchBandwidth = st_vars->prefetch_bw_oto; } else { - *DestinationLinesForPrefetch = dst_y_prefetch_equ; - TimeForFetchingMetaPTE = Tvm_equ; - TimeForFetchingRowInVBlank = Tr0_equ; - *PrefetchBandwidth = prefetch_bw_equ; + *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_equ; + st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_equ; + st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_equ; + *PrefetchBandwidth = st_vars->prefetch_bw_equ; } - *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; + *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * st_vars->TimeForFetchingMetaPTE / st_vars->LineTime, 1.0) / 4.0; *DestinationLinesToRequestRowInVBlank = - dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; + dml_ceil(4.0 * st_vars->TimeForFetchingRowInVBlank / st_vars->LineTime, 1.0) / 4.0; - LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - + st_vars->LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); - dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); - dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); + dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, st_vars->TimeForFetchingRowInVBlank); + dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime); dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); - dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); + dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, st_vars->LinesToRequestPrefetchPixelData); #endif - if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) { - *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; + if (st_vars->LinesToRequestPrefetchPixelData >= 1 && st_vars->prefetch_bw_equ > 0) { + *VRatioPrefetchY = (double) PrefetchSourceLinesY / st_vars->LinesToRequestPrefetchPixelData; *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); @@ -3877,12 +3851,12 @@ bool dml32_CalculatePrefetchSchedule( dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY); #endif if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { - if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { + if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { *VRatioPrefetchY = dml_max((double) PrefetchSourceLinesY / - LinesToRequestPrefetchPixelData, + st_vars->LinesToRequestPrefetchPixelData, (double) MaxNumSwathY * SwathHeightY / - (LinesToRequestPrefetchPixelData - + (st_vars->LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0)); *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); } else { @@ -3896,7 +3870,7 @@ bool dml32_CalculatePrefetchSchedule( #endif } - *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; + *VRatioPrefetchC = (double) PrefetchSourceLinesC / st_vars->LinesToRequestPrefetchPixelData; *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); #ifdef __DML_VBA_DEBUG__ @@ -3905,11 +3879,11 @@ bool dml32_CalculatePrefetchSchedule( dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC); #endif if ((SwathHeightC > 4)) { - if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { + if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { *VRatioPrefetchC = dml_max(*VRatioPrefetchC, (double) MaxNumSwathC * SwathHeightC / - (LinesToRequestPrefetchPixelData - + (st_vars->LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0)); *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); } else { @@ -3924,25 +3898,25 @@ bool dml32_CalculatePrefetchSchedule( } *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY - / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub - / LineTime; + / st_vars->LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub + / st_vars->LineTime; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); - dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); + dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime); dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma); #endif *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / - LinesToRequestPrefetchPixelData + st_vars->LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC - * swath_width_chroma_ub / LineTime; + * swath_width_chroma_ub / st_vars->LineTime; } else { MyError = true; #ifdef __DML_VBA_DEBUG__ dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", - __func__, LinesToRequestPrefetchPixelData); + __func__, st_vars->LinesToRequestPrefetchPixelData); #endif *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; @@ -3951,15 +3925,15 @@ bool dml32_CalculatePrefetchSchedule( } #ifdef __DML_VBA_DEBUG__ dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", - (double)LinesToRequestPrefetchPixelData * LineTime + - 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); - dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); + (double)st_vars->LinesToRequestPrefetchPixelData * st_vars->LineTime + + 2.0*st_vars->TimeForFetchingRowInVBlank + st_vars->TimeForFetchingMetaPTE); + dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", st_vars->TimeForFetchingMetaPTE); dml_print("DML: To: %fus - time for propagation from scaler to optc\n", - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime); + (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime); dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); - dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - - TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler + - ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); + dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * st_vars->LineTime - + st_vars->TimeForFetchingMetaPTE - 2*st_vars->TimeForFetchingRowInVBlank - (*DSTYAfterScaler + + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime - TWait - TCalc - *TSetup); dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow); #endif @@ -3967,7 +3941,7 @@ bool dml32_CalculatePrefetchSchedule( MyError = true; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", - __func__, dst_y_prefetch_equ); + __func__, st_vars->dst_y_prefetch_equ); #endif } @@ -3983,10 +3957,10 @@ bool dml32_CalculatePrefetchSchedule( dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); - dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); + dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime); #endif prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / - (*DestinationLinesToRequestVMInVBlank * LineTime); + (*DestinationLinesToRequestVMInVBlank * st_vars->LineTime); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); #endif @@ -4003,7 +3977,7 @@ bool dml32_CalculatePrefetchSchedule( prefetch_row_bw = 0; } else if (*DestinationLinesToRequestRowInVBlank > 0) { prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / - (*DestinationLinesToRequestRowInVBlank * LineTime); + (*DestinationLinesToRequestRowInVBlank * st_vars->LineTime); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); @@ -4026,12 +4000,12 @@ bool dml32_CalculatePrefetchSchedule( if (MyError) { *PrefetchBandwidth = 0; - TimeForFetchingMetaPTE = 0; - TimeForFetchingRowInVBlank = 0; + st_vars->TimeForFetchingMetaPTE = 0; + st_vars->TimeForFetchingRowInVBlank = 0; *DestinationLinesToRequestVMInVBlank = 0; *DestinationLinesToRequestRowInVBlank = 0; *DestinationLinesForPrefetch = 0; - LinesToRequestPrefetchPixelData = 0; + st_vars->LinesToRequestPrefetchPixelData = 0; *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; *RequiredPrefetchPixDataBWLuma = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index ecd4f1e7d1f79..37a314ce284b2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -715,6 +715,7 @@ double dml32_CalculateExtraLatency( unsigned int HostVMMaxNonCachedPageTableLevels); bool dml32_CalculatePrefetchSchedule( + struct dml32_CalculatePrefetchSchedule *st_vars, double HostVMInefficiencyFactor, DmlPipe *myPipe, unsigned int DSCDelay, diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index 42e4e4c5e656e..8460aefe7b6d8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -247,6 +247,43 @@ struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport { unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX]; }; +struct dml32_CalculatePrefetchSchedule { + unsigned int DPPCycles, DISPCLKCycles; + double DSTTotalPixelsAfterScaler; + double LineTime; + double dst_y_prefetch_equ; + double prefetch_bw_oto; + double Tvm_oto; + double Tr0_oto; + double Tvm_oto_lines; + double Tr0_oto_lines; + double dst_y_prefetch_oto; + double TimeForFetchingMetaPTE; + double TimeForFetchingRowInVBlank; + double LinesToRequestPrefetchPixelData; + unsigned int HostVMDynamicLevelsTrips; + double trip_to_mem; + double Tvm_trips; + double Tr0_trips; + double Tvm_trips_rounded; + double Tr0_trips_rounded; + double Lsw_oto; + double Tpre_rounded; + double prefetch_bw_equ; + double Tvm_equ; + double Tr0_equ; + double Tdmbf; + double Tdmec; + double Tdmsks; + double prefetch_sw_bytes; + double bytes_pp; + double dep_bytes; + unsigned int max_vratio_pre; + double min_Lsw; + double Tsw_est1; + double Tsw_est3; +}; + struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation { unsigned int dummy_integer_array[2][DC__NUM_DPP__MAX]; double dummy_single_array[2][DC__NUM_DPP__MAX]; @@ -321,6 +358,7 @@ struct dummy_vars { struct dml32_CalculateSwathAndDETConfiguration dml32_CalculateSwathAndDETConfiguration; struct dml32_CalculateVMRowAndSwath dml32_CalculateVMRowAndSwath; struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport; + struct dml32_CalculatePrefetchSchedule dml32_CalculatePrefetchSchedule; }; struct vba_vars_st { From 9dd4545f65679042273a93054ec3bb665ecf7366 Mon Sep 17 00:00:00 2001 From: Slark Xiao Date: Thu, 21 Jul 2022 14:09:46 +0800 Subject: [PATCH 100/134] drm/amd: Fix typo 'the the' in comment Replace 'the the' with 'the' in the comment. Signed-off-by: Slark Xiao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 2 +- drivers/gpu/drm/amd/include/atombios.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index ff659d4f772b3..8adeb7469f1e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -47,7 +47,7 @@ * for GPU/CPU synchronization. When the fence is written, * it is expected that all buffers associated with that fence * are no longer in use by the associated ring on the GPU and - * that the the relevant GPU caches have been flushed. + * that the relevant GPU caches have been flushed. */ struct amdgpu_fence { diff --git a/drivers/gpu/drm/amd/include/atombios.h b/drivers/gpu/drm/amd/include/atombios.h index 1f9df4e7509b5..15943bc21bc54 100644 --- a/drivers/gpu/drm/amd/include/atombios.h +++ b/drivers/gpu/drm/amd/include/atombios.h @@ -3255,8 +3255,8 @@ ucMaxNBVoltageHigh: Voltage regulator dependent PWM value. High 8 bits of t ucMinNBVoltageHigh: Voltage regulator dependent PWM value. High 8 bits of the value for the min voltage.Set this one to 0x00 if VC without PWM or no VC at all. -usInterNBVoltageLow: Voltage regulator dependent PWM value. The value makes the the voltage >=Min NB voltage but <=InterNBVoltageHigh. Set this to 0x0000 if VC without PWM or no VC at all. -usInterNBVoltageHigh: Voltage regulator dependent PWM value. The value makes the the voltage >=InterNBVoltageLow but <=Max NB voltage.Set this to 0x0000 if VC without PWM or no VC at all. +usInterNBVoltageLow: Voltage regulator dependent PWM value. The value makes the voltage >=Min NB voltage but <=InterNBVoltageHigh. Set this to 0x0000 if VC without PWM or no VC at all. +usInterNBVoltageHigh: Voltage regulator dependent PWM value. The value makes the voltage >=InterNBVoltageLow but <=Max NB voltage.Set this to 0x0000 if VC without PWM or no VC at all. */ From 3616d49da5dab635c4bb0eeb183b61df72cd0757 Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Tue, 19 Apr 2022 14:17:05 +0800 Subject: [PATCH 101/134] drm/amdgpu: enable swiotlb for gmc 10.0 (V2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable swiotlb for gmc 10.0. v2: include drm_cache.h to use the function ‘drm_need_swiotlb’ Signed-off-by: Aaron Liu Reviewed-by: Alex Deucher Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 1772f006c61ad..9ae8cdaa033ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -22,6 +22,9 @@ */ #include #include + +#include + #include "amdgpu.h" #include "amdgpu_atomfirmware.h" #include "gmc_v10_0.h" @@ -980,6 +983,8 @@ static int gmc_v10_0_sw_init(void *handle) return r; } + adev->need_swiotlb = drm_need_swiotlb(44); + r = gmc_v10_0_mc_init(adev); if (r) return r; From 4c5aa594928f97593502a66d5a9075f5f5dd064b Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Mon, 25 Jul 2022 08:45:56 +0800 Subject: [PATCH 102/134] drm/amdgpu: enable swiotlb for gmc 11.0 Enable swiotlb for gmc 11.0. Signed-off-by: Aaron Liu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 503e40a903191..1471bfb9ae38e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -22,6 +22,9 @@ */ #include #include + +#include + #include "amdgpu.h" #include "amdgpu_atomfirmware.h" #include "gmc_v11_0.h" @@ -775,6 +778,8 @@ static int gmc_v11_0_sw_init(void *handle) return r; } + adev->need_swiotlb = drm_need_swiotlb(44); + r = gmc_v11_0_mc_init(adev); if (r) return r; From 8de297dc046c180651c0500f8611663ae1c3828a Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Mon, 11 Jul 2022 10:03:03 -0400 Subject: [PATCH 103/134] drm/amd/display: Avoid MPC infinite loop [why] In some cases MPC tree bottom pipe ends up point to itself. This causes iterating from top to bottom to hang the system in an infinite loop. [how] When looping to next MPC bottom pipe, check that the pointer is not same as current to avoid infinite loop. Reviewed-by: Josip Pavic Reviewed-by: Jun Lei Acked-by: Alex Hung Signed-off-by: Aric Cyr Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c index 11019c2c62ccb..769974375b4b3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c @@ -49,6 +49,11 @@ void mpc1_set_bg_color(struct mpc *mpc, /* find bottommost mpcc. */ while (bottommost_mpcc->mpcc_bot) { + /* avoid circular linked link */ + ASSERT(bottommost_mpcc != bottommost_mpcc->mpcc_bot); + if (bottommost_mpcc == bottommost_mpcc->mpcc_bot) + break; + bottommost_mpcc = bottommost_mpcc->mpcc_bot; } From 44584b417a698bfaac0c2577e7cc4015ea7359ce Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 6 Jul 2022 16:26:40 -0400 Subject: [PATCH 104/134] drm/amd/display: Add enable/disable FIFO callbacks to stream setup [Why] We don't write out attributes after disabling and re-enabling the link on some monitors, causing some, but not all, HDMI displays to fail to lightup on DCN314. [How] Firmware used to do this after DIG link setup. Since firmware is no longer doing this to support USB4 and dynamic link remapping we'll need to add this to driver in the equivalent paths. New optional callbacks were created in the stream encoder interface and implementations were added for DCN314. Reviewed-by: Michael Strauss Acked-by: Alex Hung Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../dc/dcn314/dcn314_dio_stream_encoder.c | 38 +++++++++++++------ .../amd/display/dc/inc/hw/stream_encoder.h | 2 + .../drm/amd/display/dc/link/link_hwss_dio.c | 7 ++++ 3 files changed, 35 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c index 2dbfa1c234ddd..b384f30395d39 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c @@ -50,6 +50,26 @@ enc1->base.ctx +static void enc314_enable_fifo(struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + /* TODO: Confirm if we need to wait for DIG_SYMCLK_FE_ON */ + REG_WAIT(DIG_FE_CNTL, DIG_SYMCLK_FE_ON, 1, 10, 5000); + REG_UPDATE_2(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 1, DIG_FIFO_READ_START_LEVEL, 0x7); + REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, 1, 10, 5000); + REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 0); + REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, 0, 10, 5000); + REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 1); +} + +static void enc314_disable_fifo(struct stream_encoder *enc) +{ + struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); + + REG_UPDATE_2(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 0, + DIG_FIFO_READ_START_LEVEL, 0); +} static void enc314_dp_set_odm_combine( struct stream_encoder *enc, @@ -92,7 +112,7 @@ void enc314_stream_encoder_dvi_set_stream_attribute( //DIG_SOURCE_SELECT is already set in dig_connect_to_otg - /* DIG_START is removed from the register spec */ + enc314_enable_fifo(enc); } ASSERT(crtc_timing->pixel_encoding == PIXEL_ENCODING_RGB); @@ -132,7 +152,7 @@ static void enc314_stream_encoder_hdmi_set_stream_attribute( //DIG_SOURCE_SELECT is already set in dig_connect_to_otg - /* DIG_START is removed from the register spec */ + enc314_enable_fifo(enc); } /* Configure pixel encoding */ @@ -302,16 +322,8 @@ static void enc314_stream_encoder_dp_unblank( REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 0); - /* - * DIG Resync FIFO now needs to be explicitly enabled. - * TODO: Confirm if we need to wait for DIG_SYMCLK_FE_ON - */ - REG_WAIT(DIG_FE_CNTL, DIG_SYMCLK_FE_ON, 1, 10, 5000); - REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 1); - REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, 1, 10, 5000); - REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 0); - REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, 0, 10, 5000); - REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 1); + /* DIG Resync FIFO now needs to be explicitly enabled. */ + enc314_enable_fifo(enc); /* wait 100us for DIG/DP logic to prime * (i.e. a few video lines) @@ -420,6 +432,8 @@ static const struct stream_encoder_funcs dcn314_str_enc_funcs = { .set_dynamic_metadata = enc2_set_dynamic_metadata, .hdmi_reset_stream_attribute = enc1_reset_hdmi_stream_attribute, + .enable_fifo = enc314_enable_fifo, + .disable_fifo = enc314_disable_fifo, .set_input_mode = enc314_set_dig_input_mode, }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h index 456dbe9f2264f..42afa1952890e 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h @@ -252,6 +252,8 @@ struct stream_encoder_funcs { void (*set_input_mode)( struct stream_encoder *enc, unsigned int pix_per_container); + void (*enable_fifo)(struct stream_encoder *enc); + void (*disable_fifo)(struct stream_encoder *enc); }; struct hpo_dp_stream_encoder_state { diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c index 776e822abcbb2..5e92019539c82 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c @@ -40,17 +40,24 @@ void set_dio_throttled_vcp_size(struct pipe_ctx *pipe_ctx, void setup_dio_stream_encoder(struct pipe_ctx *pipe_ctx) { struct link_encoder *link_enc = link_enc_cfg_get_link_enc(pipe_ctx->stream->link); + struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc; link_enc->funcs->connect_dig_be_to_fe(link_enc, pipe_ctx->stream_res.stream_enc->id, true); if (dc_is_dp_signal(pipe_ctx->stream->signal)) dp_source_sequence_trace(pipe_ctx->stream->link, DPCD_SOURCE_SEQ_AFTER_CONNECT_DIG_FE_BE); + if (stream_enc->funcs->enable_fifo) + stream_enc->funcs->enable_fifo(stream_enc); } void reset_dio_stream_encoder(struct pipe_ctx *pipe_ctx) { struct link_encoder *link_enc = link_enc_cfg_get_link_enc(pipe_ctx->stream->link); + struct stream_encoder *stream_enc = pipe_ctx->stream_res.stream_enc; + + if (stream_enc && stream_enc->funcs->disable_fifo) + stream_enc->funcs->disable_fifo(stream_enc); link_enc->funcs->connect_dig_be_to_fe( link_enc, From 2ce0b2186c057a54a4d980b296bd1659d0091716 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 7 Jul 2022 10:19:53 -0400 Subject: [PATCH 105/134] drm/amd/display: Fix OPTC function pointers for DCN314 [Why] Access violation occurs when swapping between HDMI and FRL monitors because we're missing the immediate_disable_crtc callback and it's required for the DCN314 clk manager. [How] Update the table to match the DCN31 optc functions for ones that should be the same: - immediate_disable_crtc - configure_crc Reviewed-by: Michael Strauss Acked-by: Alex Hung Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h | 2 ++ drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c index c4304f25ce953..2f7404a974790 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c @@ -141,7 +141,7 @@ static bool optc31_disable_crtc(struct timing_generator *optc) return true; } -static bool optc31_immediate_disable_crtc(struct timing_generator *optc) +bool optc31_immediate_disable_crtc(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h index 3706e6f7880ee..30b81a448ce2d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h @@ -258,6 +258,8 @@ void dcn31_timing_generator_init(struct optc *optc1); +bool optc31_immediate_disable_crtc(struct timing_generator *optc); + void optc31_set_drr(struct timing_generator *optc, const struct drr_params *params); void optc3_init_odm(struct timing_generator *optc); diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c index 436c3545a9831..3011f9e2f35c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c @@ -170,6 +170,7 @@ static struct timing_generator_funcs dcn314_tg_funcs = { .program_global_sync = optc1_program_global_sync, .enable_crtc = optc314_enable_crtc, .disable_crtc = optc314_disable_crtc, + .immediate_disable_crtc = optc31_immediate_disable_crtc, .phantom_crtc_post_enable = optc314_phantom_crtc_post_enable, /* used by enable_timing_synchronization. Not need for FPGA */ .is_counter_moving = optc1_is_counter_moving, @@ -204,7 +205,7 @@ static struct timing_generator_funcs dcn314_tg_funcs = { .clear_optc_underflow = optc1_clear_optc_underflow, .setup_global_swap_lock = NULL, .get_crc = optc1_get_crc, - .configure_crc = optc1_configure_crc, + .configure_crc = optc2_configure_crc, .set_dsc_config = optc3_set_dsc_config, .get_dsc_status = optc2_get_dsc_status, .set_dwb_source = NULL, From 319568d75f5f91cd4f362b26e65af2a4437c64bf Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Tue, 12 Jul 2022 15:49:19 -0400 Subject: [PATCH 106/134] drm/amd/display: Updates SubVP and SubVP DRR cases [Description] - For any DRR cases in SubVP, don't lock for VSYNC flips - For DCN32/321 use FW to do DRR manual trigger programming - Add bit in SubVP cmd to indicate if the SubVP pipe is DRR Reviewed-by: Jun Lei Acked-by: Alex Hung Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 17 +++++ drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h | 1 + .../drm/amd/display/dc/dcn32/dcn32_hwseq.c | 7 +-- .../gpu/drm/amd/display/dc/dcn32/dcn32_optc.c | 63 ++++++++++++++++++- 4 files changed, 82 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 39b426d04037c..2d61c2a91cee2 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -270,6 +270,23 @@ void dc_dmub_srv_drr_update_cmd(struct dc *dc, uint32_t tg_inst, uint32_t vtotal dc_dmub_srv_wait_idle(dc->ctx->dmub_srv); } +void dc_dmub_srv_set_drr_manual_trigger_cmd(struct dc *dc, uint32_t tg_inst) +{ + union dmub_rb_cmd cmd = { 0 }; + + cmd.drr_update.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; + // TODO: Uncomment once FW headers are promoted + //cmd.drr_update.header.sub_type = DMUB_CMD__FAMS_SET_MANUAL_TRIGGER; + cmd.drr_update.dmub_optc_state_req.tg_inst = tg_inst; + + cmd.drr_update.header.payload_bytes = sizeof(cmd.drr_update) - sizeof(cmd.drr_update.header); + + // Send the command to the DMCUB. + dc_dmub_srv_cmd_queue(dc->ctx->dmub_srv, &cmd); + dc_dmub_srv_cmd_execute(dc->ctx->dmub_srv); + dc_dmub_srv_wait_idle(dc->ctx->dmub_srv); +} + static uint8_t dc_dmub_srv_get_pipes_for_stream(struct dc *dc, struct dc_stream_state *stream) { uint8_t pipes = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index 1d124a2695d56..159782cd6659f 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -74,6 +74,7 @@ void dc_dmub_trace_event_control(struct dc *dc, bool enable); void dc_dmub_srv_drr_update_cmd(struct dc *dc, uint32_t tg_inst, uint32_t vtotal_min, uint32_t vtotal_max); +void dc_dmub_srv_set_drr_manual_trigger_cmd(struct dc *dc, uint32_t tg_inst); bool dc_dmub_srv_p_state_delegate(struct dc *dc, bool enable_pstate, struct dc_state *context); void dc_dmub_srv_query_caps_cmd(struct dmub_srv *dmub); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index b6bada383958d..bf9ac9dfc7ddb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -424,7 +424,6 @@ void dcn32_subvp_pipe_control_lock(struct dc *dc, unsigned int i = 0; bool subvp_immediate_flip = false; bool subvp_in_use = false; - bool drr_pipe = false; struct pipe_ctx *pipe; for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -440,12 +439,10 @@ void dcn32_subvp_pipe_control_lock(struct dc *dc, if (top_pipe_to_program->stream->mall_stream_config.type == SUBVP_MAIN && top_pipe_to_program->plane_state->flip_immediate) subvp_immediate_flip = true; - else if (top_pipe_to_program->stream->mall_stream_config.type == SUBVP_NONE && - top_pipe_to_program->stream->ignore_msa_timing_param) - drr_pipe = true; } - if ((subvp_in_use && (should_lock_all_pipes || subvp_immediate_flip || drr_pipe)) || (!subvp_in_use && subvp_prev_use)) { + // Don't need to lock for DRR VSYNC flips -- FW will wait for DRR pending update cleared. + if ((subvp_in_use && (should_lock_all_pipes || subvp_immediate_flip)) || (!subvp_in_use && subvp_prev_use)) { union dmub_inbox0_cmd_lock_hw hw_lock_cmd = { 0 }; if (!lock) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c index 00ff21458a535..992e56c6907ee 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c @@ -26,9 +26,11 @@ #include "dcn32_optc.h" #include "dcn30/dcn30_optc.h" +#include "dcn31/dcn31_optc.h" #include "reg_helper.h" #include "dc.h" #include "dcn_calc_math.h" +#include "dc_dmub_srv.h" #define REG(reg)\ optc1->tg_regs->reg @@ -188,6 +190,65 @@ static void optc32_set_odm_bypass(struct timing_generator *optc, optc1->opp_count = 1; } +void optc32_setup_manual_trigger(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + struct dc *dc = optc->ctx->dc; + + if (dc->caps.dmub_caps.mclk_sw && !dc->debug.disable_fams) + dc_dmub_srv_set_drr_manual_trigger_cmd(dc, optc->inst); + else { + /* + * MIN_MASK_EN is gone and MASK is now always enabled. + * + * To get it to it work with manual trigger we need to make sure + * we program the correct bit. + */ + REG_UPDATE_4(OTG_V_TOTAL_CONTROL, + OTG_V_TOTAL_MIN_SEL, 1, + OTG_V_TOTAL_MAX_SEL, 1, + OTG_FORCE_LOCK_ON_EVENT, 0, + OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */ + + // Setup manual flow control for EOF via TRIG_A + optc->funcs->setup_manual_trigger(optc); + } +} + +void optc32_set_drr( + struct timing_generator *optc, + const struct drr_params *params) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + if (params != NULL && + params->vertical_total_max > 0 && + params->vertical_total_min > 0) { + + if (params->vertical_total_mid != 0) { + + REG_SET(OTG_V_TOTAL_MID, 0, + OTG_V_TOTAL_MID, params->vertical_total_mid - 1); + + REG_UPDATE_2(OTG_V_TOTAL_CONTROL, + OTG_VTOTAL_MID_REPLACING_MAX_EN, 1, + OTG_VTOTAL_MID_FRAME_NUM, + (uint8_t)params->vertical_total_mid_frame_num); + + } + + optc->funcs->set_vtotal_min_max(optc, params->vertical_total_min - 1, params->vertical_total_max - 1); + optc32_setup_manual_trigger(optc); + } else { + REG_UPDATE_4(OTG_V_TOTAL_CONTROL, + OTG_SET_V_TOTAL_MIN_MASK, 0, + OTG_V_TOTAL_MIN_SEL, 0, + OTG_V_TOTAL_MAX_SEL, 0, + OTG_FORCE_LOCK_ON_EVENT, 0); + + optc->funcs->set_vtotal_min_max(optc, 0, 0); + } +} static struct timing_generator_funcs dcn32_tg_funcs = { .validate_timing = optc1_validate_timing, @@ -221,7 +282,7 @@ static struct timing_generator_funcs dcn32_tg_funcs = { .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable, .enable_optc_clock = optc1_enable_optc_clock, .set_vrr_m_const = optc3_set_vrr_m_const, - .set_drr = optc1_set_drr, + .set_drr = optc31_set_drr, // TODO: Update to optc32_set_drr once FW headers are promoted .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, .set_vtotal_min_max = optc3_set_vtotal_min_max, .set_static_screen_control = optc1_set_static_screen_control, From a7cefb0b40dcfdafedc54a3ba659327d0336956d Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 12 Jul 2022 14:32:45 -0400 Subject: [PATCH 107/134] drm/amd/display: Guard against zero memory channels [Why] If BIOS doesn't specify number of memory channels then bandwidth validation will fail due to insufficient BW in DML. [How] If BIOS is setting zero channels then use the default in the table. If no entry is in the table and no BIOS value is specified then throw an ASSERT for future developers to look into. Reviewed-by: Michael Strauss Acked-by: Alex Hung Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index 450ebd8385056..56ada096c89de 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -1916,8 +1916,11 @@ static void dcn314_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *b dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count; - dcn3_14_soc.num_chans = bw_params->num_channels; + if (bw_params->num_channels > 0) + dcn3_14_soc.num_chans = bw_params->num_channels; + + ASSERT(dcn3_14_soc.num_chans); ASSERT(clk_table->num_entries); /* Prepass to find max clocks independent of voltage level. */ From eb54e014367e1700fa5b94e4c788520081a70661 Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Tue, 12 Jul 2022 05:54:38 -0400 Subject: [PATCH 108/134] drm/amd/display: Fix two MPO videos in single display ODM combine mode [Why] In single display ODM combine mode, two MPO videos ( three planes ) are not working [How] When we detect three planes, don't set odm combine 2to1 policy for the MPO planes. Otherwise, we run out of pipes available Add support for two MPO videos in dc_add_plane_to_context(). Don't allow both videos to be on the same side of the display. Add extra check when fetching free pipe for two MPO videos. Reviewed-by: Alvin Lee Acked-by: Alex Hung Signed-off-by: Samson Tam Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_resource.c | 136 +++++++++++++--- .../drm/amd/display/dc/dcn32/dcn32_resource.c | 149 ++++++++++++++++-- .../drm/amd/display/dc/dcn32/dcn32_resource.h | 6 + 3 files changed, 260 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 752ba4ab2b1e3..ffc0f1c0ea93b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1463,6 +1463,7 @@ bool dc_add_plane_to_context( struct dc_stream_status *stream_status = NULL; struct pipe_ctx *prev_right_head = NULL; struct pipe_ctx *free_right_pipe = NULL; + struct pipe_ctx *prev_left_head = NULL; DC_LOGGER_INIT(stream->ctx->logger); for (i = 0; i < context->stream_count; i++) @@ -1514,8 +1515,16 @@ bool dc_add_plane_to_context( /* ODM + window MPO, where MPO window is on right half only */ if (free_pipe->plane_state && - (free_pipe->plane_state->clip_rect.x >= free_pipe->stream->src.x + free_pipe->stream->src.width/2) && - tail_pipe->next_odm_pipe) { + (free_pipe->plane_state->clip_rect.x >= free_pipe->stream->src.x + free_pipe->stream->src.width/2) && + tail_pipe->next_odm_pipe) { + + /* For ODM + window MPO, in 3 plane case, if we already have a MPO window on + * the right side, then we will invalidate a 2nd one on the right side + */ + if (head_pipe->next_odm_pipe && tail_pipe->next_odm_pipe->bottom_pipe) { + dc_plane_state_release(plane_state); + return false; + } DC_LOG_SCALER("%s - ODM + window MPO(right). free_pipe:%d tail_pipe->next_odm_pipe:%d\n", __func__, @@ -1530,20 +1539,42 @@ bool dc_add_plane_to_context( * - If not, continue to use free_pipe * - If the right side already has a pipe, use that pipe instead if its available */ + + /* + * We also want to avoid the case where with three plane ( 2 MPO videos ), we have + * both videos on the left side so one of the videos is invalidated. Then we + * move the invalidated video back to the right side. If the order of the plane + * states is such that the right MPO plane is processed first, the free pipe + * selected by the head will be the left MPO pipe. But since there was no right + * MPO pipe, it will assign the free pipe to the right MPO pipe instead and + * a pipe reallocation will occur. + * Check the old context to see if the left side already has a pipe allocated + * - If not, continue to use free_pipe + * - If the left side is already using this pipe, then pick another pipe for right + */ + prev_right_head = &dc->current_state->res_ctx.pipe_ctx[tail_pipe->next_odm_pipe->pipe_idx]; - if ((prev_right_head->bottom_pipe) && (free_pipe->pipe_idx != prev_right_head->bottom_pipe->pipe_idx)) { + if ((prev_right_head->bottom_pipe) && + (free_pipe->pipe_idx != prev_right_head->bottom_pipe->pipe_idx)) { free_right_pipe = acquire_free_pipe_for_head(context, pool, tail_pipe->next_odm_pipe); - if (free_right_pipe) { - free_pipe->stream = NULL; - memset(&free_pipe->stream_res, 0, sizeof(struct stream_resource)); - memset(&free_pipe->plane_res, 0, sizeof(struct plane_resource)); - free_pipe->plane_state = NULL; - free_pipe->pipe_idx = 0; - free_right_pipe->plane_state = plane_state; - free_pipe = free_right_pipe; + } else { + prev_left_head = &dc->current_state->res_ctx.pipe_ctx[head_pipe->pipe_idx]; + if ((prev_left_head->bottom_pipe) && + (free_pipe->pipe_idx == prev_left_head->bottom_pipe->pipe_idx)) { + free_right_pipe = acquire_free_pipe_for_head(context, pool, head_pipe); } } + if (free_right_pipe) { + free_pipe->stream = NULL; + memset(&free_pipe->stream_res, 0, sizeof(struct stream_resource)); + memset(&free_pipe->plane_res, 0, sizeof(struct plane_resource)); + free_pipe->plane_state = NULL; + free_pipe->pipe_idx = 0; + free_right_pipe->plane_state = plane_state; + free_pipe = free_right_pipe; + } + free_pipe->stream_res.tg = tail_pipe->next_odm_pipe->stream_res.tg; free_pipe->stream_res.abm = tail_pipe->next_odm_pipe->stream_res.abm; free_pipe->stream_res.opp = tail_pipe->next_odm_pipe->stream_res.opp; @@ -1553,7 +1584,63 @@ bool dc_add_plane_to_context( free_pipe->top_pipe = tail_pipe->next_odm_pipe; tail_pipe->next_odm_pipe->bottom_pipe = free_pipe; + } else if (free_pipe->plane_state && + (free_pipe->plane_state->clip_rect.x >= free_pipe->stream->src.x + free_pipe->stream->src.width/2) + && head_pipe->next_odm_pipe) { + + /* For ODM + window MPO, support 3 plane ( 2 MPO ) case. + * Here we have a desktop ODM + left window MPO and a new MPO window appears + * on the right side only. It fails the first case, because tail_pipe is the + * left window MPO, so it has no next_odm_pipe. So in this scenario, we check + * for head_pipe->next_odm_pipe instead + */ + DC_LOG_SCALER("%s - ODM + win MPO (left) + win MPO (right). free_pipe:%d head_pipe->next_odm:%d\n", + __func__, + free_pipe->pipe_idx, + head_pipe->next_odm_pipe ? head_pipe->next_odm_pipe->pipe_idx : -1); + + /* + * We want to avoid the case where the right side already has a pipe assigned to + * it and is different from free_pipe ( which would cause trigger a pipe + * reallocation ). + * Check the old context to see if the right side already has a pipe allocated + * - If not, continue to use free_pipe + * - If the right side already has a pipe, use that pipe instead if its available + */ + prev_right_head = &dc->current_state->res_ctx.pipe_ctx[head_pipe->next_odm_pipe->pipe_idx]; + if ((prev_right_head->bottom_pipe) && + (free_pipe->pipe_idx != prev_right_head->bottom_pipe->pipe_idx)) { + free_right_pipe = acquire_free_pipe_for_head(context, pool, head_pipe->next_odm_pipe); + if (free_right_pipe) { + free_pipe->stream = NULL; + memset(&free_pipe->stream_res, 0, sizeof(struct stream_resource)); + memset(&free_pipe->plane_res, 0, sizeof(struct plane_resource)); + free_pipe->plane_state = NULL; + free_pipe->pipe_idx = 0; + free_right_pipe->plane_state = plane_state; + free_pipe = free_right_pipe; + } + } + + free_pipe->stream_res.tg = head_pipe->next_odm_pipe->stream_res.tg; + free_pipe->stream_res.abm = head_pipe->next_odm_pipe->stream_res.abm; + free_pipe->stream_res.opp = head_pipe->next_odm_pipe->stream_res.opp; + free_pipe->stream_res.stream_enc = head_pipe->next_odm_pipe->stream_res.stream_enc; + free_pipe->stream_res.audio = head_pipe->next_odm_pipe->stream_res.audio; + free_pipe->clock_source = head_pipe->next_odm_pipe->clock_source; + + free_pipe->top_pipe = head_pipe->next_odm_pipe; + head_pipe->next_odm_pipe->bottom_pipe = free_pipe; } else { + + /* For ODM + window MPO, in 3 plane case, if we already have a MPO window on + * the left side, then we will invalidate a 2nd one on the left side + */ + if (head_pipe->next_odm_pipe && tail_pipe->top_pipe) { + dc_plane_state_release(plane_state); + return false; + } + free_pipe->stream_res.tg = tail_pipe->stream_res.tg; free_pipe->stream_res.abm = tail_pipe->stream_res.abm; free_pipe->stream_res.opp = tail_pipe->stream_res.opp; @@ -1564,21 +1651,28 @@ bool dc_add_plane_to_context( free_pipe->top_pipe = tail_pipe; tail_pipe->bottom_pipe = free_pipe; - if (!free_pipe->next_odm_pipe && tail_pipe->next_odm_pipe && tail_pipe->next_odm_pipe->bottom_pipe) { - free_pipe->next_odm_pipe = tail_pipe->next_odm_pipe->bottom_pipe; - tail_pipe->next_odm_pipe->bottom_pipe->prev_odm_pipe = free_pipe; - } - if (!free_pipe->prev_odm_pipe && tail_pipe->prev_odm_pipe && tail_pipe->prev_odm_pipe->bottom_pipe) { - free_pipe->prev_odm_pipe = tail_pipe->prev_odm_pipe->bottom_pipe; - tail_pipe->prev_odm_pipe->bottom_pipe->next_odm_pipe = free_pipe; + /* Connect MPO pipes together if MPO window is in the centre */ + if (!(free_pipe->plane_state && + (free_pipe->plane_state->clip_rect.x + free_pipe->plane_state->clip_rect.width <= + free_pipe->stream->src.x + free_pipe->stream->src.width/2))) { + if (!free_pipe->next_odm_pipe && + tail_pipe->next_odm_pipe && tail_pipe->next_odm_pipe->bottom_pipe) { + free_pipe->next_odm_pipe = tail_pipe->next_odm_pipe->bottom_pipe; + tail_pipe->next_odm_pipe->bottom_pipe->prev_odm_pipe = free_pipe; + } + if (!free_pipe->prev_odm_pipe && + tail_pipe->prev_odm_pipe && tail_pipe->prev_odm_pipe->bottom_pipe) { + free_pipe->prev_odm_pipe = tail_pipe->prev_odm_pipe->bottom_pipe; + tail_pipe->prev_odm_pipe->bottom_pipe->next_odm_pipe = free_pipe; + } } } } /* ODM + window MPO, where MPO window is on left half only */ if (free_pipe->plane_state && - (free_pipe->plane_state->clip_rect.x + free_pipe->plane_state->clip_rect.width <= - free_pipe->stream->src.x + free_pipe->stream->src.width/2)) { + (free_pipe->plane_state->clip_rect.x + free_pipe->plane_state->clip_rect.width <= + free_pipe->stream->src.x + free_pipe->stream->src.width/2)) { DC_LOG_SCALER("%s - ODM + window MPO(left). free_pipe:%d\n", __func__, free_pipe->pipe_idx); @@ -1586,7 +1680,7 @@ bool dc_add_plane_to_context( } /* ODM + window MPO, where MPO window is on right half only */ if (free_pipe->plane_state && - (free_pipe->plane_state->clip_rect.x >= free_pipe->stream->src.x + free_pipe->stream->src.width/2)) { + (free_pipe->plane_state->clip_rect.x >= free_pipe->stream->src.x + free_pipe->stream->src.width/2)) { DC_LOG_SCALER("%s - ODM + window MPO(right). free_pipe:%d\n", __func__, free_pipe->pipe_idx); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index e551d2936d03d..314dec5712b56 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -1820,11 +1820,12 @@ int dcn32_populate_dml_pipes_from_context( struct resource_context *res_ctx = &context->res_ctx; struct pipe_ctx *pipe; bool subvp_in_use = false, is_pipe_split_expected[MAX_PIPES]; + int plane_count = 0; + struct dc_crtc_timing *timing; dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { - struct dc_crtc_timing *timing; if (!res_ctx->pipe_ctx[i].stream) continue; @@ -1876,11 +1877,12 @@ int dcn32_populate_dml_pipes_from_context( } } - pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal; - if (context->stream_count == 1) { - if (dc->debug.enable_single_display_2to1_odm_policy) - pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1; - } + /* Calculate the number of planes we have so we can determine + * whether to apply ODM 2to1 policy or not + */ + if (pipe->stream && !pipe->prev_odm_pipe && + (!pipe->top_pipe || pipe->top_pipe->plane_state != pipe->plane_state)) + ++plane_count; DC_FP_START(); is_pipe_split_expected[i] = dcn32_predict_pipe_split(context, pipes[i].pipe, i); @@ -1889,6 +1891,28 @@ int dcn32_populate_dml_pipes_from_context( pipe_cnt++; } + /* Determine whether we will apply ODM 2to1 policy + * Applies to single display and where the number of planes is less than 3 + * For 3 plane case ( 2 MPO planes ), we will not set the policy for the MPO pipes + */ + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + if (!res_ctx->pipe_ctx[i].stream) + continue; + pipe = &res_ctx->pipe_ctx[i]; + timing = &pipe->stream->timing; + + pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal; + res_ctx->pipe_ctx[i].stream->odm_2to1_policy_applied = false; + if (context->stream_count == 1 && timing->dsc_cfg.num_slices_h != 1) { + if (dc->debug.enable_single_display_2to1_odm_policy) { + if (!((plane_count > 2) && pipe->top_pipe)) + pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1; + } + res_ctx->pipe_ctx[i].stream->odm_2to1_policy_applied = true; + } + pipe_cnt++; + } + /* For DET allocation, we don't want to use DML policy (not optimal for utilizing all * the DET available for each pipe). Use the DET override input to maintain our driver * policy. @@ -1947,7 +1971,7 @@ static struct resource_funcs dcn32_res_pool_funcs = { .validate_bandwidth = dcn32_validate_bandwidth, .calculate_wm_and_dlg = dcn32_calculate_wm_and_dlg, .populate_dml_pipes = dcn32_populate_dml_pipes_from_context, - .acquire_idle_pipe_for_layer = dcn20_acquire_idle_pipe_for_layer, + .acquire_idle_pipe_for_head_pipe_in_layer = dcn32_acquire_idle_pipe_for_head_pipe_in_layer, .add_stream_to_ctx = dcn30_add_stream_to_ctx, .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, @@ -1976,7 +2000,7 @@ static bool dcn32_resource_construct( uint32_t pipe_fuses = 0; uint32_t num_pipes = 4; - DC_FP_START(); + DC_FP_START(); ctx->dc_bios->regs = &bios_regs; @@ -2316,13 +2340,13 @@ static bool dcn32_resource_construct( pool->base.oem_device = NULL; } - DC_FP_END(); + DC_FP_END(); return true; create_fail: - DC_FP_END(); + DC_FP_END(); dcn32_resource_destruct(pool); @@ -2346,3 +2370,108 @@ struct resource_pool *dcn32_create_resource_pool( kfree(pool); return NULL; } + +static struct pipe_ctx *find_idle_secondary_pipe_check_mpo( + struct resource_context *res_ctx, + const struct resource_pool *pool, + const struct pipe_ctx *primary_pipe) +{ + int i; + struct pipe_ctx *secondary_pipe = NULL; + struct pipe_ctx *next_odm_mpo_pipe = NULL; + int primary_index, preferred_pipe_idx; + struct pipe_ctx *old_primary_pipe = NULL; + + /* + * Modified from find_idle_secondary_pipe + * With windowed MPO and ODM, we want to avoid the case where we want a + * free pipe for the left side but the free pipe is being used on the + * right side. + * Add check on current_state if the primary_pipe is the left side, + * to check the right side ( primary_pipe->next_odm_pipe ) to see if + * it is using a pipe for MPO ( primary_pipe->next_odm_pipe->bottom_pipe ) + * - If so, then don't use this pipe + * EXCEPTION - 3 plane ( 2 MPO plane ) case + * - in this case, the primary pipe has already gotten a free pipe for the + * MPO window in the left + * - when it tries to get a free pipe for the MPO window on the right, + * it will see that it is already assigned to the right side + * ( primary_pipe->next_odm_pipe ). But in this case, we want this + * free pipe, since it will be for the right side. So add an + * additional condition, that skipping the free pipe on the right only + * applies if the primary pipe has no bottom pipe currently assigned + */ + if (primary_pipe) { + primary_index = primary_pipe->pipe_idx; + old_primary_pipe = &primary_pipe->stream->ctx->dc->current_state->res_ctx.pipe_ctx[primary_index]; + if ((old_primary_pipe->next_odm_pipe) && (old_primary_pipe->next_odm_pipe->bottom_pipe) + && (!primary_pipe->bottom_pipe)) + next_odm_mpo_pipe = old_primary_pipe->next_odm_pipe->bottom_pipe; + + preferred_pipe_idx = (pool->pipe_count - 1) - primary_pipe->pipe_idx; + if ((res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) && + !(next_odm_mpo_pipe && next_odm_mpo_pipe->pipe_idx == preferred_pipe_idx)) { + secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx]; + secondary_pipe->pipe_idx = preferred_pipe_idx; + } + } + + /* + * search backwards for the second pipe to keep pipe + * assignment more consistent + */ + if (!secondary_pipe) + for (i = pool->pipe_count - 1; i >= 0; i--) { + if ((res_ctx->pipe_ctx[i].stream == NULL) && + !(next_odm_mpo_pipe && next_odm_mpo_pipe->pipe_idx == i)) { + secondary_pipe = &res_ctx->pipe_ctx[i]; + secondary_pipe->pipe_idx = i; + break; + } + } + + return secondary_pipe; +} + +struct pipe_ctx *dcn32_acquire_idle_pipe_for_head_pipe_in_layer( + struct dc_state *state, + const struct resource_pool *pool, + struct dc_stream_state *stream, + struct pipe_ctx *head_pipe) +{ + struct resource_context *res_ctx = &state->res_ctx; + struct pipe_ctx *idle_pipe, *pipe; + struct resource_context *old_ctx = &stream->ctx->dc->current_state->res_ctx; + int head_index; + + if (!head_pipe) + ASSERT(0); + + /* + * Modified from dcn20_acquire_idle_pipe_for_layer + * Check if head_pipe in old_context already has bottom_pipe allocated. + * - If so, check if that pipe is available in the current context. + * -- If so, reuse pipe from old_context + */ + head_index = head_pipe->pipe_idx; + pipe = &old_ctx->pipe_ctx[head_index]; + if (pipe->bottom_pipe && res_ctx->pipe_ctx[pipe->bottom_pipe->pipe_idx].stream == NULL) { + idle_pipe = &res_ctx->pipe_ctx[pipe->bottom_pipe->pipe_idx]; + idle_pipe->pipe_idx = pipe->bottom_pipe->pipe_idx; + } else { + idle_pipe = find_idle_secondary_pipe_check_mpo(res_ctx, pool, head_pipe); + if (!idle_pipe) + return NULL; + } + + idle_pipe->stream = head_pipe->stream; + idle_pipe->stream_res.tg = head_pipe->stream_res.tg; + idle_pipe->stream_res.opp = head_pipe->stream_res.opp; + + idle_pipe->plane_res.hubp = pool->hubps[idle_pipe->pipe_idx]; + idle_pipe->plane_res.ipp = pool->ipps[idle_pipe->pipe_idx]; + idle_pipe->plane_res.dpp = pool->dpps[idle_pipe->pipe_idx]; + idle_pipe->plane_res.mpcc_inst = pool->dpps[idle_pipe->pipe_idx]->inst; + + return idle_pipe; +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index fc0fe48023a07..efd449804d7b9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -99,6 +99,12 @@ bool dcn32_subvp_in_use(struct dc *dc, bool dcn32_mpo_in_use(struct dc_state *context); +struct pipe_ctx *dcn32_acquire_idle_pipe_for_head_pipe_in_layer( + struct dc_state *state, + const struct resource_pool *pool, + struct dc_stream_state *stream, + struct pipe_ctx *head_pipe); + void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_params_st *pipes, bool *is_pipe_split_expected, int pipe_cnt); From 4a21ab548a7ac19b15aba9aa8df39972ed82f4ad Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Wed, 13 Jul 2022 12:33:37 -0400 Subject: [PATCH 109/134] drm/amd/display: Calculate MALL cache lines based on Mblks required [Description] - Calculation for NumWays in MALL should be based on number of MBlks Reviewed-by: Jun Lei Acked-by: Alex Hung Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn32/dcn32_resource.h | 1 + .../amd/display/dc/dcn32/dcn32_resource_helpers.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h index efd449804d7b9..1e7e6201c8801 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h @@ -29,6 +29,7 @@ #include "core_types.h" #define DCN3_2_DET_SEG_SIZE 64 +#define DCN3_2_MALL_MBLK_SIZE_BYTES 65536 // 64 * 1024 #define TO_DCN32_RES_POOL(pool)\ container_of(pool, struct dcn32_resource_pool, base) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index 47caa2c6d5b4e..266c49884f044 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -51,6 +51,9 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat uint32_t cache_lines_used = 0; uint32_t lines_per_way = 0; uint32_t total_cache_lines = 0; + uint32_t bytes_in_mall = 0; + uint32_t num_mblks = 0; + uint32_t cache_lines_per_plane = 0; uint32_t i = 0; for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -61,9 +64,19 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4; mall_region_pixels = pipe->stream->timing.h_addressable * pipe->stream->timing.v_addressable; + + // For bytes required in MALL, calculate based on number of MBlks required + num_mblks = (mall_region_pixels * bytes_per_pixel + + DCN3_2_MALL_MBLK_SIZE_BYTES - 1) / DCN3_2_MALL_MBLK_SIZE_BYTES; + bytes_in_mall = num_mblks * DCN3_2_MALL_MBLK_SIZE_BYTES; // cache lines used is total bytes / cache_line size. Add +2 for worst case alignment // (MALL is 64-byte aligned) - cache_lines_used += (bytes_per_pixel * mall_region_pixels) / dc->caps.cache_line_size + 2; + cache_lines_per_plane = bytes_in_mall / dc->caps.cache_line_size + 2; + + // For DCC we must cache the meat surface, so double cache lines required + if (pipe->plane_state->dcc.enable) + cache_lines_per_plane *= 2; + cache_lines_used += cache_lines_per_plane; } } From f46e3f28f89b893685ec9204405677dac410d8ad Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Wed, 13 Jul 2022 17:07:43 -0400 Subject: [PATCH 110/134] drm/amd/display: Reset pipe count when iterating for DET override [Why] Coding error in DET allocation was resulting in too few DET segments being allocated, causing underflow. [How] Reset pipe count each time we begin iterating through pipes for a stream. Reviewed-by: Alvin Lee Acked-by: Alex Hung Signed-off-by: Taimur Hassan Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index 266c49884f044..b3f8503cea9c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -184,7 +184,8 @@ void dcn32_determine_det_override(struct dc_state *context, display_e2e_pipe_par if (context->stream_count > 0) { stream_segments = 18 / context->stream_count; - for (i = 0, count = 0; i < context->stream_count; i++) { + for (i = 0; i < context->stream_count; i++) { + count = 0; for (j = 0; j < pipe_cnt; j++) { if (context->res_ctx.pipe_ctx[j].stream == context->streams[i]) { count++; From 72bb503097e79113503dc6d1d49c4063ef97970a Mon Sep 17 00:00:00 2001 From: hersen wu Date: Thu, 14 Jul 2022 10:00:28 -0400 Subject: [PATCH 111/134] drm/amd/display: Reboot while unplug hdcp enabled dp from mst hub [Why] event_property_update does not check NULL pointer [How] check aconnector->base.state equals NULL Reviewed-by: Bhawanpreet Lakha Acked-by: Alex Hung Signed-off-by: hersen wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 34 ++++++++++++------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index fbb252afb494d..6202e31c7e3a6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -302,7 +302,7 @@ static void event_property_update(struct work_struct *work) mutex_lock(&hdcp_work->mutex); - if (aconnector->base.state->commit) { + if (aconnector->base.state && aconnector->base.state->commit) { ret = wait_for_completion_interruptible_timeout(&aconnector->base.state->commit->hw_done, 10 * HZ); if (ret == 0) { @@ -311,18 +311,26 @@ static void event_property_update(struct work_struct *work) } } - if (hdcp_work->encryption_status != MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF) { - if (aconnector->base.state->hdcp_content_type == DRM_MODE_HDCP_CONTENT_TYPE0 && - hdcp_work->encryption_status <= MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE0_ON) - drm_hdcp_update_content_protection(&aconnector->base, DRM_MODE_CONTENT_PROTECTION_ENABLED); - else if (aconnector->base.state->hdcp_content_type == DRM_MODE_HDCP_CONTENT_TYPE1 && - hdcp_work->encryption_status == MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE1_ON) - drm_hdcp_update_content_protection(&aconnector->base, DRM_MODE_CONTENT_PROTECTION_ENABLED); - } else { - drm_hdcp_update_content_protection(&aconnector->base, DRM_MODE_CONTENT_PROTECTION_DESIRED); + if (aconnector->base.state) { + if (hdcp_work->encryption_status != MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF) { + if (aconnector->base.state->hdcp_content_type == + DRM_MODE_HDCP_CONTENT_TYPE0 && + hdcp_work->encryption_status <= + MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE0_ON) + drm_hdcp_update_content_protection(&aconnector->base, + DRM_MODE_CONTENT_PROTECTION_ENABLED); + else if (aconnector->base.state->hdcp_content_type == + DRM_MODE_HDCP_CONTENT_TYPE1 && + hdcp_work->encryption_status == + MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE1_ON) + drm_hdcp_update_content_protection(&aconnector->base, + DRM_MODE_CONTENT_PROTECTION_ENABLED); + } else { + drm_hdcp_update_content_protection(&aconnector->base, + DRM_MODE_CONTENT_PROTECTION_DESIRED); + } } - mutex_unlock(&hdcp_work->mutex); drm_modeset_unlock(&dev->mode_config.connection_mutex); } @@ -495,7 +503,9 @@ static void update_config(void *handle, struct cp_psp_stream_config *config) (!!aconnector->base.state) ? aconnector->base.state->content_protection : -1, (!!aconnector->base.state) ? aconnector->base.state->hdcp_content_type : -1); - hdcp_update_display(hdcp_work, link_index, aconnector, conn_state->hdcp_content_type, false); + if (conn_state) + hdcp_update_display(hdcp_work, link_index, aconnector, + conn_state->hdcp_content_type, false); } From ee4a26b4ea029aeed749c6560ed9eba224337b04 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 14 Jul 2022 12:29:12 -0400 Subject: [PATCH 112/134] drm/amd/display: Drop unnecessary FPU flags on dcn302 files We already isolated the DCN302 code in the DML folder, but we forgot to drop the FPU flags from the Makefile. This commit drops those flags. Reviewed-by: Harry Wentland Acked-by: Alex Hung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn302/Makefile | 25 ------------------- 1 file changed, 25 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile index e4b69ad0dde56..ebd01cb467b79 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile @@ -7,31 +7,6 @@ DCN3_02 = dcn302_init.o dcn302_hwseq.o dcn302_resource.o -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o := -mhard-float -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o := -mhard-float -maltivec -endif - -ifdef CONFIG_X86 -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -endif - -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o += -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o += -msse2 -endif -endif - AMD_DAL_DCN3_02 = $(addprefix $(AMDDALPATH)/dc/dcn302/,$(DCN3_02)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN3_02) From a4f1b04216023ff0f4cd89328b59ee6890248130 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Thu, 14 Jul 2022 18:21:28 -0400 Subject: [PATCH 113/134] drm/amd/display: Fallback to SW cursor if SubVP + cursor too big [Description] - For SubVP cursor cannot be cached in MALL, therefore we will switch to SW cursor if the cursor size exceeds what can fit in the local DCN buffers (64x64x4) - Returning false / failure for set_cursor_attributes will fallback to SW cursor Reviewed-by: Jun Lei Acked-by: Alex Hung Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 6752ca44e6e0b..f62d50901d92e 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -328,6 +328,11 @@ bool dc_stream_set_cursor_attributes( } dc = stream->ctx->dc; + + if (attributes->height * attributes->width * 4 > 16384) + if (stream->mall_stream_config.type == SUBVP_MAIN) + return false; + stream->cursor_attributes = *attributes; dc_z10_restore(dc); From 6a7379f196230cfa35335ec627e0c0e08da6a8b8 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 14 Jul 2022 15:11:31 -0400 Subject: [PATCH 114/134] drm/amd/display: Remove FPU operations from dcn201 resources We have some FPU operations on the resource part of the DCN201. This commit drops FPU flags and moves any required FPU code to the DML folder. Reviewed-by: Harry Wentland Acked-by: Alex Hung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn201/Makefile | 24 ----- .../amd/display/dc/dcn201/dcn201_resource.c | 10 ++- .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c | 87 +++++++++++++++++++ .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.h | 4 + 4 files changed, 100 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile index 96cbd4ccd344c..5c9ce2cebb0f6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn201/Makefile @@ -6,30 +6,6 @@ DCN201 = dcn201_init.o dcn201_resource.o dcn201_hwseq.o \ dcn201_mpc.o dcn201_hubp.o dcn201_opp.o dcn201_optc.o dcn201_dpp.o \ dcn201_dccg.o dcn201_link_encoder.o -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn201/dcn201_resource.o := -mhard-float -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/dcn201/dcn201_resource.o := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/dcn201/dcn201_resource.o += -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/dcn201/dcn201_resource.o += -msse2 -endif -endif AMD_DAL_DCN201 = $(addprefix $(AMDDALPATH)/dc/dcn201/,$(DCN201)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN201) diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c index 0bb7d3dd53fa4..e549a79f3fe1a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c @@ -1036,6 +1036,14 @@ static bool dcn201_get_dcc_compression_cap(const struct dc *dc, output); } +static void dcn201_populate_dml_writeback_from_context(struct dc *dc, + struct resource_context *res_ctx, + display_e2e_pipe_params_st *pipes) +{ + DC_FP_START(); + dcn201_populate_dml_writeback_from_context_fpu(dc, res_ctx, pipes); + DC_FP_END(); +} static void dcn201_destroy_resource_pool(struct resource_pool **pool) { @@ -1067,8 +1075,8 @@ static struct resource_funcs dcn201_res_pool_funcs = { .add_dsc_to_stream_resource = NULL, .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, .acquire_idle_pipe_for_layer = dcn201_acquire_idle_pipe_for_layer, + .populate_dml_writeback_from_context = dcn201_populate_dml_writeback_from_context, .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, - .populate_dml_writeback_from_context = dcn20_populate_dml_writeback_from_context, .set_mcif_arb_params = dcn20_set_mcif_arb_params, .find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link }; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index eeeae52fe6fc7..39428488a0524 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -2303,3 +2303,90 @@ void dcn21_clk_mgr_set_bw_params_wm_table(struct clk_bw_params *bw_params) bw_params->wm_table.entries[WM_D].wm_type = WM_TYPE_RETRAINING; bw_params->wm_table.entries[WM_D].valid = true; } + +void dcn201_populate_dml_writeback_from_context_fpu(struct dc *dc, + struct resource_context *res_ctx, + display_e2e_pipe_params_st *pipes) +{ + int pipe_cnt, i, j; + double max_calc_writeback_dispclk; + double writeback_dispclk; + struct writeback_st dout_wb; + + dc_assert_fp_enabled(); + + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + struct dc_stream_state *stream = res_ctx->pipe_ctx[i].stream; + + if (!stream) + continue; + max_calc_writeback_dispclk = 0; + + /* Set writeback information */ + pipes[pipe_cnt].dout.wb_enable = 0; + pipes[pipe_cnt].dout.num_active_wb = 0; + for (j = 0; j < stream->num_wb_info; j++) { + struct dc_writeback_info *wb_info = &stream->writeback_info[j]; + + if (wb_info->wb_enabled && wb_info->writeback_source_plane && + (wb_info->writeback_source_plane == res_ctx->pipe_ctx[i].plane_state)) { + pipes[pipe_cnt].dout.wb_enable = 1; + pipes[pipe_cnt].dout.num_active_wb++; + dout_wb.wb_src_height = wb_info->dwb_params.cnv_params.crop_en ? + wb_info->dwb_params.cnv_params.crop_height : + wb_info->dwb_params.cnv_params.src_height; + dout_wb.wb_src_width = wb_info->dwb_params.cnv_params.crop_en ? + wb_info->dwb_params.cnv_params.crop_width : + wb_info->dwb_params.cnv_params.src_width; + dout_wb.wb_dst_width = wb_info->dwb_params.dest_width; + dout_wb.wb_dst_height = wb_info->dwb_params.dest_height; + dout_wb.wb_htaps_luma = wb_info->dwb_params.scaler_taps.h_taps; + dout_wb.wb_vtaps_luma = wb_info->dwb_params.scaler_taps.v_taps;; + dout_wb.wb_htaps_chroma = wb_info->dwb_params.scaler_taps.h_taps_c; + dout_wb.wb_vtaps_chroma = wb_info->dwb_params.scaler_taps.v_taps_c; + dout_wb.wb_hratio = wb_info->dwb_params.cnv_params.crop_en ? + (double)wb_info->dwb_params.cnv_params.crop_width / + (double)wb_info->dwb_params.dest_width : + (double)wb_info->dwb_params.cnv_params.src_width / + (double)wb_info->dwb_params.dest_width; + dout_wb.wb_vratio = wb_info->dwb_params.cnv_params.crop_en ? + (double)wb_info->dwb_params.cnv_params.crop_height / + (double)wb_info->dwb_params.dest_height : + (double)wb_info->dwb_params.cnv_params.src_height / + (double)wb_info->dwb_params.dest_height; + if (wb_info->dwb_params.out_format == dwb_scaler_mode_yuv420) { + if (wb_info->dwb_params.output_depth == DWB_OUTPUT_PIXEL_DEPTH_8BPC) + dout_wb.wb_pixel_format = dm_420_8; + else + dout_wb.wb_pixel_format = dm_420_10; + } else + dout_wb.wb_pixel_format = dm_444_32; + + /* Workaround for cases where multiple writebacks are connected to same plane + * In which case, need to compute worst case and set the associated writeback parameters + * This workaround is necessary due to DML computation assuming only 1 set of writeback + * parameters per pipe */ + writeback_dispclk = CalculateWriteBackDISPCLK( + dout_wb.wb_pixel_format, + pipes[pipe_cnt].pipe.dest.pixel_rate_mhz, + dout_wb.wb_hratio, + dout_wb.wb_vratio, + dout_wb.wb_htaps_luma, + dout_wb.wb_vtaps_luma, + dout_wb.wb_htaps_chroma, + dout_wb.wb_vtaps_chroma, + dout_wb.wb_dst_width, + pipes[pipe_cnt].pipe.dest.htotal, + 2); + + if (writeback_dispclk > max_calc_writeback_dispclk) { + max_calc_writeback_dispclk = writeback_dispclk; + pipes[pipe_cnt].dout.wb = dout_wb; + } + } + } + + pipe_cnt++; + } + +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h index a6e1ad0f38e9c..c51badf7b68a9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.h @@ -84,4 +84,8 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params void dcn21_clk_mgr_set_bw_params_wm_table(struct clk_bw_params *bw_params); +void dcn201_populate_dml_writeback_from_context_fpu(struct dc *dc, + struct resource_context *res_ctx, + display_e2e_pipe_params_st *pipes); + #endif /* __DCN20_FPU_H__ */ From 05674cc9ea97079ad7c819628e3e56b046c2f497 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 14 Jul 2022 15:27:58 -0400 Subject: [PATCH 115/134] drm/amd/display: Move mclk calculation function to DML The function responsible for calculating the MCLK switching has FPU operations. This commit moves it to the dcn30_fpu file. Reviewed-by: Harry Wentland Acked-by: Alex Hung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn30/dcn30_resource.c | 38 ---------------- .../drm/amd/display/dc/dml/dcn30/dcn30_fpu.c | 43 +++++++++++++++++++ .../drm/amd/display/dc/dml/dcn30/dcn30_fpu.h | 6 +++ 3 files changed, 49 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 7f01463942fb0..e5e54097a07dd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -2031,44 +2031,6 @@ void dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 4U * 1000U * 1000U * 1000U; } -/* - * Finds dummy_latency_index when MCLK switching using firmware based - * vblank stretch is enabled. This function will iterate through the - * table of dummy pstate latencies until the lowest value that allows - * dm_allow_self_refresh_and_mclk_switch to happen is found - */ -int dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, int pipe_cnt, int vlevel) -{ - const int max_latency_table_entries = 4; - int dummy_latency_index = 0; - - while (dummy_latency_index < max_latency_table_entries) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = - dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; - dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false); - - if (context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank == - dm_allow_self_refresh_and_mclk_switch) - break; - - dummy_latency_index++; - } - - if (dummy_latency_index == max_latency_table_entries) { - ASSERT(dummy_latency_index != max_latency_table_entries); - /* If the execution gets here, it means dummy p_states are - * not possible. This should never happen and would mean - * something is severely wrong. - * Here we reset dummy_latency_index to 3, because it is - * better to have underflows than system crashes. - */ - dummy_latency_index = 3; - } - - return dummy_latency_index; -} - void dcn30_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) { DC_FP_START(); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c index c00f759fddedf..9e32b45b63dc7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c @@ -616,6 +616,49 @@ void dcn30_fpu_update_bw_bounding_box(struct dc *dc, } +/** + * Finds dummy_latency_index when MCLK switching using firmware based + * vblank stretch is enabled. This function will iterate through the + * table of dummy pstate latencies until the lowest value that allows + * dm_allow_self_refresh_and_mclk_switch to happen is found + */ +int dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) +{ + const int max_latency_table_entries = 4; + int dummy_latency_index = 0; + + dc_assert_fp_enabled(); + + while (dummy_latency_index < max_latency_table_entries) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; + dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false); + + if (context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank == + dm_allow_self_refresh_and_mclk_switch) + break; + + dummy_latency_index++; + } + + if (dummy_latency_index == max_latency_table_entries) { + ASSERT(dummy_latency_index != max_latency_table_entries); + /* If the execution gets here, it means dummy p_states are + * not possible. This should never happen and would mean + * something is severely wrong. + * Here we reset dummy_latency_index to 3, because it is + * better to have underflows than system crashes. + */ + dummy_latency_index = 3; + } + + return dummy_latency_index; +} + void dcn3_fpu_build_wm_range_table(struct clk_mgr *base) { /* defaults */ diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h index c2024052a4977..3e4221af1c1ef 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h @@ -63,6 +63,12 @@ void dcn30_fpu_update_bw_bounding_box(struct dc *dc, unsigned int *dcfclk_mhz, unsigned int *dram_speed_mts); +int dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, + struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel); + void dcn3_fpu_build_wm_range_table(struct clk_mgr *base); #endif /* __DCN30_FPU_H__*/ From bdf4473394deffc94c16c0502e73f2daabbdc1d0 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 14 Jul 2022 16:13:10 -0400 Subject: [PATCH 116/134] drm/amd/display: Create patch bounding box function for isolate FPU In the DCN30 resource, we have a small patch to the bounding box struct; this patch uses FPU operations. This commit moves that specific part to its function under the DML folder. Reviewed-by: Harry Wentland Acked-by: Alex Hung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn30/dcn30_resource.c | 19 ++---------------- .../drm/amd/display/dc/dml/dcn30/dcn30_fpu.c | 20 +++++++++++++++++++ .../drm/amd/display/dc/dml/dcn30/dcn30_fpu.h | 2 ++ 3 files changed, 24 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index e5e54097a07dd..8bdf3573610fb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -1521,26 +1521,11 @@ static bool init_soc_bounding_box(struct dc *dc, loaded_ip->max_num_otg = pool->base.res_cap->num_timing_generator; loaded_ip->max_num_dpp = pool->base.pipe_count; loaded_ip->clamp_min_dcfclk = dc->config.clamp_min_dcfclk; - - DC_FP_START(); dcn20_patch_bounding_box(dc, loaded_bb); + DC_FP_START(); + patch_dcn30_soc_bounding_box(dc, &dcn3_0_soc); DC_FP_END(); - if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { - struct bp_soc_bb_info bb_info = {0}; - - if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { - if (bb_info.dram_clock_change_latency_100ns > 0) - dcn3_0_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10; - - if (bb_info.dram_sr_enter_exit_latency_100ns > 0) - dcn3_0_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10; - - if (bb_info.dram_sr_exit_latency_100ns > 0) - dcn3_0_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10; - } - } - return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c index 9e32b45b63dc7..6dd9a70314c0e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c @@ -721,3 +721,23 @@ void dcn3_fpu_build_wm_range_table(struct clk_mgr *base) base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz; base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF; } + +void patch_dcn30_soc_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *dcn3_0_ip) +{ + dc_assert_fp_enabled(); + + if (dc->ctx->dc_bios->funcs->get_soc_bb_info) { + struct bp_soc_bb_info bb_info = {0}; + + if (dc->ctx->dc_bios->funcs->get_soc_bb_info(dc->ctx->dc_bios, &bb_info) == BP_RESULT_OK) { + if (bb_info.dram_clock_change_latency_100ns > 0) + dcn3_0_soc.dram_clock_change_latency_us = bb_info.dram_clock_change_latency_100ns * 10; + + if (bb_info.dram_sr_enter_exit_latency_100ns > 0) + dcn3_0_soc.sr_enter_plus_exit_time_us = bb_info.dram_sr_enter_exit_latency_100ns * 10; + + if (bb_info.dram_sr_exit_latency_100ns > 0) + dcn3_0_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10; + } + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h index 3e4221af1c1ef..cab864095ce7e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h @@ -71,4 +71,6 @@ int dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, void dcn3_fpu_build_wm_range_table(struct clk_mgr *base); +void patch_dcn30_soc_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *dcn3_0_ip); + #endif /* __DCN30_FPU_H__*/ From 40b31e5355ba8c4f7e58f1c458bbe46763dca541 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 14 Jul 2022 16:32:54 -0400 Subject: [PATCH 117/134] drm/amd/display: Remove FPU flags from DCN30 Makefile At this stage, we must have all the FPU code for DCN30 isolated in the DML folder. Drop FPU flags from Makefile. Reviewed-by: Harry Wentland Acked-by: Alex Hung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/Makefile | 30 ------------------- 1 file changed, 30 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile index c20331eb62e01..b7c2ae9ddfda3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile @@ -30,36 +30,6 @@ DCN30 = dcn30_init.o dcn30_hubbub.o dcn30_hubp.o dcn30_dpp.o dcn30_optc.o \ dcn30_dpp_cm.o dcn30_dwb_cm.o dcn30_cm_common.o dcn30_mmhubbub.o \ dcn30_dio_link_encoder.o dcn30_resource.o - -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mhard-float -msse -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mhard-float -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mhard-float -maltivec -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o += -mpreferred-stack-boundary=4 -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o += -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o += -msse2 -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o += -msse2 -endif -endif - AMD_DAL_DCN30 = $(addprefix $(AMDDALPATH)/dc/dcn30/,$(DCN30)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN30) From cb849b4dc73d414149fea96330cdf96a82919fc9 Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Tue, 31 May 2022 16:55:32 -0400 Subject: [PATCH 118/134] drm/amd/display: Add pixel rate div calcs and programming [WHY/HOW] Need to calculate and set some pixel rate divisors on correct otg_inst Reviewed-by: Nicholas Kazlauskas Acked-by: Alex Hung Signed-off-by: Michael Strauss Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn314/dcn314_dccg.c | 17 +++++++++ .../drm/amd/display/dc/dcn314/dcn314_hwseq.c | 36 +++++++++++++++++++ .../drm/amd/display/dc/dcn314/dcn314_hwseq.h | 2 ++ .../drm/amd/display/dc/dcn314/dcn314_init.c | 1 + 4 files changed, 56 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c index ea78da9c6f8b8..3852a6d59b978 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c @@ -219,6 +219,21 @@ void dccg314_set_dpstreamclk( } } +void dccg314_set_valid_pixel_rate( + struct dccg *dccg, + int ref_dtbclk_khz, + int otg_inst, + int pixclk_khz) +{ + struct dtbclk_dto_params dto_params = {0}; + + dto_params.ref_dtbclk_khz = ref_dtbclk_khz; + dto_params.otg_inst = otg_inst; + dto_params.pixclk_khz = pixclk_khz; + + dccg314_set_dtbclk_dto(dccg, &dto_params); +} + static const struct dccg_funcs dccg314_funcs = { .update_dpp_dto = dccg31_update_dpp_dto, .get_dccg_ref_freq = dccg31_get_dccg_ref_freq, @@ -237,6 +252,8 @@ static const struct dccg_funcs dccg314_funcs = { .set_dispclk_change_mode = dccg31_set_dispclk_change_mode, .disable_dsc = dccg31_disable_dscclk, .enable_dsc = dccg31_enable_dscclk, + .set_pixel_rate_div = dccg314_set_pixel_rate_div, + .set_valid_pixel_rate = dccg314_set_valid_pixel_rate, }; struct dccg *dccg314_create( diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c index 90ec76487264c..755c715ad8dce 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c @@ -338,3 +338,39 @@ void dcn314_enable_power_gating_plane(struct dce_hwseq *hws, bool enable) if (org_ip_request_cntl == 0) REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0); } + +unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div) +{ + struct dc_stream_state *stream = pipe_ctx->stream; + unsigned int odm_combine_factor = 0; + + odm_combine_factor = get_odm_config(pipe_ctx, NULL); + + if (is_dp_128b_132b_signal(pipe_ctx)) { + *k2_div = PIXEL_RATE_DIV_BY_1; + } else if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal) || dc_is_dvi_signal(pipe_ctx->stream->signal)) { + *k1_div = PIXEL_RATE_DIV_BY_1; + if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) + *k2_div = PIXEL_RATE_DIV_BY_2; + else + *k2_div = PIXEL_RATE_DIV_BY_4; + } else if (dc_is_dp_signal(pipe_ctx->stream->signal)) { + if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) { + *k1_div = PIXEL_RATE_DIV_BY_1; + *k2_div = PIXEL_RATE_DIV_BY_2; + } else if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) { + *k1_div = PIXEL_RATE_DIV_BY_2; + *k2_div = PIXEL_RATE_DIV_BY_2; + } else { + if (odm_combine_factor == 1) + *k2_div = PIXEL_RATE_DIV_BY_4; + else if (odm_combine_factor == 2) + *k2_div = PIXEL_RATE_DIV_BY_2; + } + } + + if ((*k1_div == PIXEL_RATE_DIV_NA) && (*k2_div == PIXEL_RATE_DIV_NA)) + ASSERT(false); + + return odm_combine_factor; +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h index dfdd0b792a528..be0f5e4d48e13 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h @@ -37,4 +37,6 @@ void dcn314_dsc_pg_control(struct dce_hwseq *hws, unsigned int dsc_inst, bool po void dcn314_enable_power_gating_plane(struct dce_hwseq *hws, bool enable); +unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div); + #endif /* __DC_HWSS_DCN314_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c index c87b1979b2cc0..b9debeb081fdf 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c @@ -144,6 +144,7 @@ static const struct hwseq_private_funcs dcn314_private_funcs = { .set_blend_lut = dcn30_set_blend_lut, .set_shaper_3dlut = dcn20_set_shaper_3dlut, .setup_hpo_hw_control = dcn31_setup_hpo_hw_control, + .calculate_dccg_k1_k2_values = dcn314_calculate_dccg_k1_k2_values, }; void dcn314_hw_sequencer_construct(struct dc *dc) From 1c5a2fa97b91d37375f4fc8aeb37c9456c93c828 Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Thu, 9 Jun 2022 10:45:34 -0400 Subject: [PATCH 119/134] drm/amd/display: Use correct DTO_SRC_SEL for 128b/132b encoding [WHY] DP DTO isn't used for 128b/132b encoding [HOW] Check current link rate to determine whether using 8b/10b or 128/132b encoding Reviewed-by: Nicholas Kazlauskas Acked-by: Alex Hung Signed-off-by: Michael Strauss Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_link_dp.c | 1 + .../drm/amd/display/dc/dce/dce_clock_source.c | 20 ++++++++++++++----- .../display/dc/dce110/dce110_hw_sequencer.c | 1 + .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 1 + .../drm/amd/display/dc/dcn20/dcn20_hwseq.c | 1 + .../gpu/drm/amd/display/dc/inc/clock_source.h | 7 +++++-- 6 files changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index dfc74aea2852a..48dad093ae8ba 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -7064,6 +7064,7 @@ void dp_enable_link_phy( pipes[i].clock_source->funcs->program_pix_clk( pipes[i].clock_source, &pipes[i].stream_res.pix_clk_params, + dp_get_link_encoding_format(link_settings), &pipes[i].pll_settings); } } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index d55da1ab1ac2e..213de8cabfadb 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -838,6 +838,7 @@ static void dce112_program_pixel_clk_resync( static bool dce110_program_pix_clk( struct clock_source *clock_source, struct pixel_clk_params *pix_clk_params, + enum dp_link_encoding encoding, struct pll_settings *pll_settings) { struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source); @@ -911,6 +912,7 @@ static bool dce110_program_pix_clk( static bool dce112_program_pix_clk( struct clock_source *clock_source, struct pixel_clk_params *pix_clk_params, + enum dp_link_encoding encoding, struct pll_settings *pll_settings) { struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source); @@ -970,6 +972,7 @@ static bool dce112_program_pix_clk( static bool dcn31_program_pix_clk( struct clock_source *clock_source, struct pixel_clk_params *pix_clk_params, + enum dp_link_encoding encoding, struct pll_settings *pll_settings) { struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source); @@ -993,9 +996,14 @@ static bool dcn31_program_pix_clk( #if defined(CONFIG_DRM_AMD_DC_DCN) /* Enable DTO */ if (clk_src->cs_mask->PIPE0_DTO_SRC_SEL) - REG_UPDATE_2(PIXEL_RATE_CNTL[inst], - DP_DTO0_ENABLE, 1, - PIPE0_DTO_SRC_SEL, 1); + if (encoding == DP_128b_132b_ENCODING) + REG_UPDATE_2(PIXEL_RATE_CNTL[inst], + DP_DTO0_ENABLE, 1, + PIPE0_DTO_SRC_SEL, 2); + else + REG_UPDATE_2(PIXEL_RATE_CNTL[inst], + DP_DTO0_ENABLE, 1, + PIPE0_DTO_SRC_SEL, 1); else REG_UPDATE(PIXEL_RATE_CNTL[inst], DP_DTO0_ENABLE, 1); @@ -1198,12 +1206,13 @@ const struct pixel_rate_range_table_entry *look_up_in_video_optimized_rate_tlb( static bool dcn20_program_pix_clk( struct clock_source *clock_source, struct pixel_clk_params *pix_clk_params, + enum dp_link_encoding encoding, struct pll_settings *pll_settings) { struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source); unsigned int inst = pix_clk_params->controller_id - CONTROLLER_ID_D0; - dce112_program_pix_clk(clock_source, pix_clk_params, pll_settings); + dce112_program_pix_clk(clock_source, pix_clk_params, encoding, pll_settings); if (clock_source->ctx->dc->hwss.enable_vblanks_synchronization && clock_source->ctx->dc->config.vblank_alignment_max_frame_time_diff > 0) { @@ -1243,6 +1252,7 @@ static const struct clock_source_funcs dcn20_clk_src_funcs = { static bool dcn3_program_pix_clk( struct clock_source *clock_source, struct pixel_clk_params *pix_clk_params, + enum dp_link_encoding encoding, struct pll_settings *pll_settings) { struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(clock_source); @@ -1265,7 +1275,7 @@ static bool dcn3_program_pix_clk( REG_UPDATE(PIXEL_RATE_CNTL[inst], DP_DTO0_ENABLE, 1); } else // For other signal types(HDMI_TYPE_A, DVI) Driver still to call VBIOS Command table - dce112_program_pix_clk(clock_source, pix_clk_params, pll_settings); + dce112_program_pix_clk(clock_source, pix_clk_params, encoding, pll_settings); return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index e69c942c8345b..38a67051d470f 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1435,6 +1435,7 @@ static enum dc_status dce110_enable_stream_timing( if (false == pipe_ctx->clock_source->funcs->program_pix_clk( pipe_ctx->clock_source, &pipe_ctx->stream_res.pix_clk_params, + dp_get_link_encoding_format(&pipe_ctx->link_config.dp_link_settings), &pipe_ctx->pll_settings)) { BREAK_TO_DEBUGGER(); return DC_ERROR_UNEXPECTED; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 7a3812604e4bf..bed783747f169 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -892,6 +892,7 @@ enum dc_status dcn10_enable_stream_timing( if (false == pipe_ctx->clock_source->funcs->program_pix_clk( pipe_ctx->clock_source, &pipe_ctx->stream_res.pix_clk_params, + dp_get_link_encoding_format(&pipe_ctx->link_config.dp_link_settings), &pipe_ctx->pll_settings)) { BREAK_TO_DEBUGGER(); return DC_ERROR_UNEXPECTED; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 3b26962637d0c..3e44b79984295 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -700,6 +700,7 @@ enum dc_status dcn20_enable_stream_timing( if (false == pipe_ctx->clock_source->funcs->program_pix_clk( pipe_ctx->clock_source, &pipe_ctx->stream_res.pix_clk_params, + dp_get_link_encoding_format(&pipe_ctx->link_config.dp_link_settings), &pipe_ctx->pll_settings)) { BREAK_TO_DEBUGGER(); return DC_ERROR_UNEXPECTED; diff --git a/drivers/gpu/drm/amd/display/dc/inc/clock_source.h b/drivers/gpu/drm/amd/display/dc/inc/clock_source.h index e2b3a2c7a9270..8f8ac8e29ed08 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/clock_source.h +++ b/drivers/gpu/drm/amd/display/dc/inc/clock_source.h @@ -160,8 +160,11 @@ struct calc_pll_clock_source { struct clock_source_funcs { bool (*cs_power_down)( struct clock_source *); - bool (*program_pix_clk)(struct clock_source *, - struct pixel_clk_params *, struct pll_settings *); + bool (*program_pix_clk)( + struct clock_source *, + struct pixel_clk_params *, + enum dp_link_encoding encoding, + struct pll_settings *); uint32_t (*get_pix_clk_dividers)( struct clock_source *, struct pixel_clk_params *, From 0d60f2b47d8312d6a88454a373990579624edbde Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Thu, 9 Jun 2022 10:48:43 -0400 Subject: [PATCH 120/134] drm/amd/display: Use correct clock source constructor for DCN314 [WHY] Previously was pointing to DCN3 clock constructor rather than DCN31's Reviewed-by: Nicholas Kazlauskas Acked-by: Alex Hung Signed-off-by: Michael Strauss Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index 56ada096c89de..b5672fb902364 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -1782,7 +1782,7 @@ static struct clock_source *dcn31_clock_source_create( if (!clk_src) return NULL; - if (dcn3_clk_src_construct(clk_src, ctx, bios, id, + if (dcn31_clk_src_construct(clk_src, ctx, bios, id, regs, &cs_shift, &cs_mask)) { clk_src->base.dp_clk_src = dp_clk_src; return &clk_src->base; From 5ade1b951dec63a81b23c2ff75c5b4c191977620 Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Thu, 9 Jun 2022 10:52:52 -0400 Subject: [PATCH 121/134] drm/amd/display: Add OTG/ODM functions [WHY] Required for correct OTG_H_TIMING_CNTL programming Reviewed-by: Nicholas Kazlauskas Acked-by: Alex Hung Signed-off-by: Michael Strauss Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn314/dcn314_optc.c | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c index 3011f9e2f35c5..0c7980266b853 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c @@ -160,6 +160,37 @@ void optc314_phantom_crtc_post_enable(struct timing_generator *optc) REG_WAIT(OTG_CLOCK_CONTROL, OTG_BUSY, 0, 1, 100000); } +static void optc314_set_odm_bypass(struct timing_generator *optc, + const struct dc_crtc_timing *dc_crtc_timing) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + enum h_timing_div_mode h_div = H_TIMING_NO_DIV; + + REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0, + OPTC_NUM_OF_INPUT_SEGMENT, 0, + OPTC_SEG0_SRC_SEL, optc->inst, + OPTC_SEG1_SRC_SEL, 0xf, + OPTC_SEG2_SRC_SEL, 0xf, + OPTC_SEG3_SRC_SEL, 0xf + ); + + h_div = optc1_is_two_pixels_per_containter(dc_crtc_timing); + REG_UPDATE(OTG_H_TIMING_CNTL, + OTG_H_TIMING_DIV_MODE, h_div); + + REG_SET(OPTC_MEMORY_CONFIG, 0, + OPTC_MEM_SEL, 0); + optc1->opp_count = 1; +} + +static void optc314_set_h_timing_div_manual_mode(struct timing_generator *optc, bool manual_mode) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_UPDATE(OTG_H_TIMING_CNTL, + OTG_H_TIMING_DIV_MODE_MANUAL, manual_mode ? 1 : 0); +} + static struct timing_generator_funcs dcn314_tg_funcs = { .validate_timing = optc1_validate_timing, @@ -222,6 +253,9 @@ static struct timing_generator_funcs dcn314_tg_funcs = { .setup_manual_trigger = optc2_setup_manual_trigger, .get_hw_timing = optc1_get_hw_timing, .init_odm = optc3_init_odm, + .set_odm_bypass = optc314_set_odm_bypass, + .set_odm_combine = optc314_set_odm_combine, + .set_h_timing_div_manual_mode = optc314_set_h_timing_div_manual_mode, }; void dcn314_timing_generator_init(struct optc *optc1) From b5e924bd7cce823e604f9b7834fac846a910331c Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Thu, 9 Jun 2022 11:02:15 -0400 Subject: [PATCH 122/134] drm/amd/display: Add missing AUDIO_DTO_SEL reg field [WHY] Needed to program audio dto Reviewed-by: Nicholas Kazlauskas Acked-by: Alex Hung Signed-off-by: Michael Strauss Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h index 99ba597bf9b73..9a4a9efc02031 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.h @@ -147,7 +147,8 @@ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P2_EN, mask_sh),\ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P3_SRC_SEL, mask_sh),\ DCCG_SF(DTBCLK_P_CNTL, DTBCLK_P3_EN, mask_sh),\ - DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL, mask_sh) + DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL, mask_sh),\ + DCCG_SF(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO_SEL, mask_sh) struct dccg *dccg314_create( struct dc_context *ctx, From 1f5dcb7365e4e0a8a6fca44352218ccc6f41a8b9 Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Fri, 10 Jun 2022 16:28:03 -0400 Subject: [PATCH 123/134] drm/amd/display: Fix dpstreamclk programming [WHY] Currently programming incorrect hpo inst as well as selecting incorrect source [HOW] Use hpo inst instead of otg inst to select dpstreamclk inst Reviewed-by: Nicholas Kazlauskas Acked-by: Alex Hung Signed-off-by: Michael Strauss Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c | 8 +++++--- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h | 8 ++------ drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c | 13 +++++++------ drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c | 3 ++- drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h | 3 ++- .../gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c | 4 ++-- 6 files changed, 20 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c index 799a383a26840..7f34418e63081 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c @@ -158,9 +158,11 @@ static void dccg31_disable_dpstreamclk(struct dccg *dccg, int otg_inst) } } -void dccg31_set_dpstreamclk(struct dccg *dccg, - enum streamclk_source src, - int otg_inst) +void dccg31_set_dpstreamclk( + struct dccg *dccg, + enum streamclk_source src, + int otg_inst, + int dp_hpo_inst) { if (src == REFCLK) dccg31_disable_dpstreamclk(dccg, otg_inst); diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h index 32b5593b14604..0902ce5eb8a11 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h @@ -161,11 +161,6 @@ struct dccg *dccg31_create( void dccg31_init(struct dccg *dccg); -void dccg31_set_dpstreamclk( - struct dccg *dccg, - enum streamclk_source src, - int otg_inst); - void dccg31_enable_symclk32_se( struct dccg *dccg, int hpo_se_inst, @@ -207,7 +202,8 @@ void dccg31_get_dccg_ref_freq( void dccg31_set_dpstreamclk( struct dccg *dccg, enum streamclk_source src, - int otg_inst); + int otg_inst, + int dp_hpo_inst); void dccg31_set_dtbclk_dto( struct dccg *dccg, diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c index 3852a6d59b978..232cc15979dda 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c @@ -184,7 +184,8 @@ void dccg314_set_dtbclk_dto( void dccg314_set_dpstreamclk( struct dccg *dccg, enum streamclk_source src, - int otg_inst) + int otg_inst, + int dp_hpo_inst) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); @@ -192,26 +193,26 @@ void dccg314_set_dpstreamclk( dccg314_set_dtbclk_p_src(dccg, src, otg_inst); /* enabled to select one of the DTBCLKs for pipe */ - switch (otg_inst) { + switch (dp_hpo_inst) { case 0: REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK0_EN, (src == REFCLK) ? 0 : 1, - DPSTREAMCLK0_SRC_SEL, 0); + DPSTREAMCLK0_SRC_SEL, otg_inst); break; case 1: REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK1_EN, (src == REFCLK) ? 0 : 1, - DPSTREAMCLK1_SRC_SEL, 1); + DPSTREAMCLK1_SRC_SEL, otg_inst); break; case 2: REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK2_EN, (src == REFCLK) ? 0 : 1, - DPSTREAMCLK2_SRC_SEL, 2); + DPSTREAMCLK2_SRC_SEL, otg_inst); break; case 3: REG_UPDATE_2(DPSTREAMCLK_CNTL, DPSTREAMCLK3_EN, (src == REFCLK) ? 0 : 1, - DPSTREAMCLK3_SRC_SEL, 3); + DPSTREAMCLK3_SRC_SEL, otg_inst); break; default: BREAK_TO_DEBUGGER(); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c index 12fc3afd9acd4..a31c64b50410b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c @@ -211,7 +211,8 @@ static void dccg32_get_dccg_ref_freq(struct dccg *dccg, void dccg32_set_dpstreamclk( struct dccg *dccg, enum streamclk_source src, - int otg_inst) + int otg_inst, + int dp_hpo_inst) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h index c2d116cce119b..ce006762f2571 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h @@ -101,7 +101,8 @@ struct dccg_funcs { void (*set_dpstreamclk)( struct dccg *dccg, enum streamclk_source src, - int otg_inst); + int otg_inst, + int dp_hpo_inst); void (*enable_symclk32_se)( struct dccg *dccg, diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c b/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c index ea6cf8bfce304..db7b0b155374a 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c @@ -116,7 +116,7 @@ static void setup_hpo_dp_stream_encoder(struct pipe_ctx *pipe_ctx) dto_params.timing = &pipe_ctx->stream->timing; dto_params.ref_dtbclk_khz = dc->clk_mgr->funcs->get_dtb_ref_clk_frequency(dc->clk_mgr); - dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst); + dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, link_enc->inst); dccg->funcs->enable_symclk32_se(dccg, stream_enc->inst, phyd32clk); dccg->funcs->set_dtbclk_dto(dccg, &dto_params); stream_enc->funcs->enable_stream(stream_enc); @@ -137,7 +137,7 @@ static void reset_hpo_dp_stream_encoder(struct pipe_ctx *pipe_ctx) stream_enc->funcs->disable(stream_enc); dccg->funcs->set_dtbclk_dto(dccg, &dto_params); dccg->funcs->disable_symclk32_se(dccg, stream_enc->inst); - dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst); + dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, pipe_ctx->link_res.hpo_dp_link_enc->inst); } static void setup_hpo_dp_stream_attribute(struct pipe_ctx *pipe_ctx) From 81f776b670a0eb17bf9cfbfd2df32f66a34bc453 Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Sat, 16 Jul 2022 23:14:01 -0400 Subject: [PATCH 124/134] drm/amd/display: Add support for manual DMUB FAMS trigger - Add is_drr parameter to indicate DRR is enabled on the panel to determine whether SubVP MCLK switch logic should be enabled - Add DRR manual trigger in FW (instead of driver) because manual trigger programming triggers DRR update pending and can block SubVP MCLK switches from taking place Acked-by: Alex Hung Signed-off-by: Anthony Koo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index de193636d0226..d7f3619352f04 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -976,7 +976,8 @@ struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 { uint16_t vtotal; uint8_t main_pipe_index; uint8_t phantom_pipe_index; - uint8_t padding[2]; + uint8_t is_drr; + uint8_t padding; } subvp_data; struct { @@ -1579,6 +1580,12 @@ enum dmub_cmd_fams_type { DMUB_CMD__FAMS_SETUP_FW_CTRL = 0, DMUB_CMD__FAMS_DRR_UPDATE = 1, DMUB_CMD__HANDLE_SUBVP_CMD = 2, // specifically for SubVP cmd + /** + * For SubVP set manual trigger in FW because it + * triggers DRR_UPDATE_PENDING which SubVP relies + * on (for any SubVP cases that use a DRR display) + */ + DMUB_CMD__FAMS_SET_MANUAL_TRIGGER = 3, }; /** From 4074f96d0f2db2fcbd8e223aebe60ca445b8aeae Mon Sep 17 00:00:00 2001 From: Chris Park Date: Tue, 28 Jun 2022 12:36:04 -0400 Subject: [PATCH 125/134] drm/amd/display: Cache cursor when cursor exceeds 64x64 [Why] When Static screen from MALL, the cursor needs to be cached if cursor exceeds 64x64 size. [How] Program the bit that cache cursor in MALL when size of the cursor exceeds 64x64. Reviewed-by: Jun Lei Acked-by: Alan Liu Acked-by: Alex Hung Signed-off-by: Chris Park Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c | 4 ++-- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h | 2 +- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c | 9 +++++++-- drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h | 2 +- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c index 3176b04a77400..6ec1c52535b9b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.c @@ -47,13 +47,13 @@ void hubp32_update_force_pstate_disallow(struct hubp *hubp, bool pstate_disallow DATA_UCLK_PSTATE_FORCE_VALUE, 0); } -void hubp32_update_mall_sel(struct hubp *hubp, uint32_t mall_sel) +void hubp32_update_mall_sel(struct hubp *hubp, uint32_t mall_sel, bool c_cursor) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); // Also cache cursor in MALL if using MALL for SS REG_UPDATE_2(DCHUBP_MALL_CONFIG, USE_MALL_SEL, mall_sel, - USE_MALL_FOR_CURSOR, mall_sel == 2 ? 1 : 0); + USE_MALL_FOR_CURSOR, c_cursor); } void hubp32_prepare_subvp_buffering(struct hubp *hubp, bool enable) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h index c4315d50fbb06..56ef711515365 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubp.h @@ -52,7 +52,7 @@ void hubp32_update_force_pstate_disallow(struct hubp *hubp, bool pstate_disallow); -void hubp32_update_mall_sel(struct hubp *hubp, uint32_t mall_sel); +void hubp32_update_mall_sel(struct hubp *hubp, uint32_t mall_sel, bool c_cursor); void hubp32_prepare_subvp_buffering(struct hubp *hubp, bool enable); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index bf9ac9dfc7ddb..1f845e9ac4069 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -670,18 +670,23 @@ void dcn32_update_mall_sel(struct dc *dc, struct dc_state *context) { int i; unsigned int num_ways = dcn32_calculate_cab_allocation(dc, context); + bool cache_cursor = false; for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; struct hubp *hubp = pipe->plane_res.hubp; if (pipe->stream && pipe->plane_state && hubp && hubp->funcs->hubp_update_mall_sel) { + if (hubp->curs_attr.width * hubp->curs_attr.height * 4 > 16384) + cache_cursor = true; + if (pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { - hubp->funcs->hubp_update_mall_sel(hubp, 1); + hubp->funcs->hubp_update_mall_sel(hubp, 1, false); } else { hubp->funcs->hubp_update_mall_sel(hubp, num_ways <= dc->caps.cache_num_ways && - pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED ? 2 : 0); + pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED ? 2 : 0, + cache_cursor); } } } diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index 906818e792dd1..44c4578193a34 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -198,7 +198,7 @@ struct hubp_funcs { void (*hubp_soft_reset)(struct hubp *hubp, bool reset); void (*hubp_update_force_pstate_disallow)(struct hubp *hubp, bool allow); - void (*hubp_update_mall_sel)(struct hubp *hubp, uint32_t mall_sel); + void (*hubp_update_mall_sel)(struct hubp *hubp, uint32_t mall_sel, bool c_cursor); void (*hubp_prepare_subvp_buffering)(struct hubp *hubp, bool enable); void (*hubp_set_flip_int)(struct hubp *hubp); From e216431b63aef8b7d9cf6e59aea39582d48b1808 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Fri, 1 Apr 2022 15:29:21 -0400 Subject: [PATCH 126/134] drm/amd/display: Add dc_ctx to link_enc_create() parameters [Why&How] Preparation to enable run time initialization of register offsets to add dc_context to the link_enc_create callback. This is needed to get the dc_ctx handle where register offset initialization routine is called. Reviewed-by: Rodrigo Siqueira Acked-by: Alan Liu Acked-by: Alex Hung Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 2 +- drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h | 1 + drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c | 4 +++- drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c | 4 +++- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c | 1 + drivers/gpu/drm/amd/display/dc/inc/core_types.h | 1 + 23 files changed, 27 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index ef54b96affa8a..9e51338441d07 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -1703,7 +1703,7 @@ static bool dc_link_construct_legacy(struct dc_link *link, enc_init_data.transmitter = translate_encoder_to_transmitter(enc_init_data.encoder); link->link_enc = - link->dc->res_pool->funcs->link_enc_create(&enc_init_data); + link->dc->res_pool->funcs->link_enc_create(dc_ctx, &enc_init_data); if (!link->link_enc) { DC_ERROR("Failed to create link encoder!\n"); diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c index 9ad8ad4550d97..54805802cbd5a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c @@ -612,6 +612,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dce100_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dce110_link_encoder *enc110 = diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index 41804059550f0..f808315b28355 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -660,6 +660,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dce110_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dce110_link_encoder *enc110 = diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c index 62da6bc3094d7..e179e80667d1c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c @@ -618,6 +618,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dce112_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dce110_link_encoder *enc110 = diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index c4353a03b48ab..1b70b78e2fa15 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -697,6 +697,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dce120_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dce110_link_encoder *enc110 = diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c index dcfa0a3efa00d..fc6aa098bda06 100644 --- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c @@ -710,6 +710,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dce60_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dce110_link_encoder *enc110 = diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c index 0c3695e79652a..b28025960050c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c @@ -713,6 +713,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dce80_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dce110_link_encoder *enc110 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index e75be799012e1..174eebbe8b4fe 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -740,6 +740,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn10_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn10_link_encoder *enc10 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 1483de85a524e..621767e994bd7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -925,6 +925,7 @@ static const struct encoder_feature_support link_enc_feature = { }; struct link_encoder *dcn20_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h index 7cbe1e9daa36e..da0241e8c2552 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h @@ -50,6 +50,7 @@ struct resource_pool *dcn20_create_resource_pool( struct dc *dc); struct link_encoder *dcn20_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data); unsigned int dcn20_calc_max_scaled_time( diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c index e549a79f3fe1a..407d995bfa99b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c @@ -788,6 +788,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn201_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index d95875952fbaf..7cb35bb1c0f15 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -1325,6 +1325,7 @@ static int map_transmitter_id_to_phy_instance( } static struct link_encoder *dcn21_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn21_link_encoder *enc21 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 8bdf3573610fb..64320e0ca4463 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -927,6 +927,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn30_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c index a5df74110284c..db172677d6134 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c @@ -890,6 +890,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn301_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c index f537888f4fa65..4fab537e822ff 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c @@ -891,7 +891,9 @@ static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { hpd_regs(4) }; -static struct link_encoder *dcn302_link_encoder_create(const struct encoder_init_data *enc_init_data) +static struct link_encoder *dcn302_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c index 76f863eb86ef2..0a67f8a5656de 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -819,7 +819,9 @@ static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = { hpd_regs(1) }; -static struct link_encoder *dcn303_link_encoder_create(const struct encoder_init_data *enc_init_data) +static struct link_encoder *dcn303_link_encoder_create( + struct dc_context *ctx, + const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = kzalloc(sizeof(struct dcn20_link_encoder), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 6d25fcf865bfc..468a893ff7854 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -1093,6 +1093,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn31_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index b5672fb902364..63861cdfb09f2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -1262,6 +1262,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn31_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c index 1a5f5977f962b..7463b12ae4a30 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c @@ -1089,6 +1089,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn31_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c index 53dea466348fc..d56a212e065cc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c @@ -1088,6 +1088,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn31_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 314dec5712b56..bb1d880eee1ea 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -1117,6 +1117,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn32_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index 9ac0fcf79bedf..73d2a653a2791 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -1119,6 +1119,7 @@ static const struct encoder_feature_support link_enc_feature = { }; static struct link_encoder *dcn321_link_encoder_create( + struct dc_context *ctx, const struct encoder_init_data *enc_init_data) { struct dcn20_link_encoder *enc20 = diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index e4b4102b15380..b3d0a4ea24463 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -96,6 +96,7 @@ struct resource_funcs { struct panel_cntl*(*panel_cntl_create)( const struct panel_cntl_init_data *panel_cntl_init_data); struct link_encoder *(*link_enc_create)( + struct dc_context *ctx, const struct encoder_init_data *init); /* Create a minimal link encoder object with no dc_link object * associated with it. */ From 6290ba4c1656ce2374996b2ed11c3997ebbc4af5 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Tue, 19 Apr 2022 11:22:17 -0400 Subject: [PATCH 127/134] drm/amd/display: Add reinstate dram in the FPO logic In order to handle FPO correctly, we need to reinstate the dram values. This function adds the required code to handle the vblank stretch and the dram calculation. Acked-by: Alex Hung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn30/dcn30_resource.h | 3 + .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 67 ++++++++++++++++--- 2 files changed, 60 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h index 3330a1026fa5f..7d063c7d6a4bf 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.h @@ -99,6 +99,9 @@ enum dc_status dcn30_add_stream_to_ctx( void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); +bool dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context); void dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(struct dc *dc, struct dc_state *context); +int dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, int pipe_cnt, int vlevel); #endif /* _DCN30_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 9175fe1f9be30..01e272f0bb05a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -30,6 +30,7 @@ #include "display_mode_vba_util_32.h" // We need this includes for WATERMARKS_* defines #include "clk_mgr/dcn32/dcn32_smu13_driver_if.h" +#include "dcn30/dcn30_resource.h" #define DC_LOGGER_INIT(logger) @@ -1597,6 +1598,10 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != dm_dram_clock_change_unsupported; + unsigned int dummy_latency_index = 0; + int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; + unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; + unsigned int min_dram_speed_mts_margin; dc_assert_fp_enabled(); @@ -1606,6 +1611,32 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, pstate_en = true; } + context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false; + + if (!pstate_en) { + /* only when the mclk switch can not be natural, is the fw based vblank stretch attempted */ + context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = + dcn30_can_support_mclk_switch_using_fw_based_vblank_stretch(dc, context); + + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { + dummy_latency_index = dcn30_find_dummy_latency_index_for_fw_based_mclk_switch(dc, + context, pipes, pipe_cnt, vlevel); + + /* After calling dcn30_find_dummy_latency_index_for_fw_based_mclk_switch + * we reinstate the original dram_clock_change_latency_us on the context + * and all variables that may have changed up to this point, except the + * newly found dummy_latency_index + */ + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; + dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false); + maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; + dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] != + dm_dram_clock_change_unsupported; + } + } + /* Set B: * For Set B calculations use clocks from clock_limits[2] when available i.e. when SMU is present, * otherwise use arbitrary low value from spreadsheet for DCFCLK as lower is safer for watermark @@ -1687,19 +1718,33 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { - unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; - unsigned int min_dram_speed_mts_margin = 160; + min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; + min_dram_speed_mts_margin = 160; - if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == - dm_dram_clock_change_unsupported) - min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16; + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->dummy_pstate_table[0].dummy_pstate_latency_us; - /* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */ - for (i = 3; i > 0; i--) - if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts) - break; + if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] == + dm_dram_clock_change_unsupported) { + int min_dram_speed_mts_offset = dc->clk_mgr->bw_params->clk_table.num_entries - 1; + + min_dram_speed_mts = + dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16; + } + + if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { + /* find largest table entry that is lower than dram speed, + * but lower than DPM0 still uses DPM0 + */ + for (dummy_latency_index = 3; dummy_latency_index > 0; dummy_latency_index--) + if (min_dram_speed_mts + min_dram_speed_mts_margin > + dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dram_speed_mts) + break; + } + + context->bw_ctx.dml.soc.dram_clock_change_latency_us = + dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us; context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us; context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; @@ -1770,6 +1815,8 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) + dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(dc, context); } static void dcn32_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, From 493af96d61b1361a44d016057e0fc6e5d4c92fc7 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Thu, 19 May 2022 14:03:09 -0400 Subject: [PATCH 128/134] drm/amd/display: Update DCN32 and DCN321 SR latencies Update worst case SR latencies according to values measured by hardware team. Reviewed-by: Rodrigo Siqueira Acked-by: Alex Hung Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 4 ++-- drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 01e272f0bb05a..66453546e24fe 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -121,8 +121,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = { }, }, .num_states = 1, - .sr_exit_time_us = 5.20, - .sr_enter_plus_exit_time_us = 9.60, + .sr_exit_time_us = 20.16, + .sr_enter_plus_exit_time_us = 27.13, .sr_exit_z8_time_us = 285.0, .sr_enter_plus_exit_z8_time_us = 320, .writeback_latency_us = 12.0, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c index 6e72336b79755..84b4b00f29cbd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -119,8 +119,8 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { }, }, .num_states = 1, - .sr_exit_time_us = 5.20, - .sr_enter_plus_exit_time_us = 9.60, + .sr_exit_time_us = 12.36, + .sr_enter_plus_exit_time_us = 16.72, .sr_exit_z8_time_us = 285.0, .sr_enter_plus_exit_z8_time_us = 320, .writeback_latency_us = 12.0, From ffccfdbab1056954fd400d2864a1c1b38915c143 Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Thu, 26 May 2022 21:12:23 -0400 Subject: [PATCH 129/134] drm/amd/display: Add missing ODM 2:1 policy logic Phantom pipes must use the same configuration used in main pipes. This commit add this check. Reviewed-by: Rodrigo Siqueira Acked-by: Alex Hung Signed-off-by: Samson Tam Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 1 - drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c | 7 ++++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 621767e994bd7..8224b9bf01d1b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -1269,7 +1269,6 @@ static void get_pixel_clock_parameters( pixel_clk_params->requested_pix_clk_100hz /= 4; else if (optc2_is_two_pixels_per_containter(&stream->timing) || opp_cnt == 2) pixel_clk_params->requested_pix_clk_100hz /= 2; - else if (hws->funcs.is_dp_dig_pixel_rate_div_policy) { if (hws->funcs.is_dp_dig_pixel_rate_div_policy(pipe_ctx)) pixel_clk_params->requested_pix_clk_100hz /= 2; diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index 1f845e9ac4069..be2e3b9e971eb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -1084,8 +1084,13 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsign struct dc_stream_state *stream = pipe_ctx->stream; unsigned int odm_combine_factor = 0; struct dc *dc = pipe_ctx->stream->ctx->dc; - bool two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing); + bool two_pix_per_container = false; + // For phantom pipes, use the same programming as the main pipes + if (pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM) { + stream = pipe_ctx->stream->mall_stream_config.paired_stream; + } + two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing); odm_combine_factor = get_odm_config(pipe_ctx, NULL); if (is_dp_128b_132b_signal(pipe_ctx)) { From 3facca7489be2d42c6256350dae72c342a47d7e5 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Thu, 2 Jun 2022 16:01:33 -0400 Subject: [PATCH 130/134] drm/amd/display: Disable MPC split for DCN32/321 Due to CRB, no need to rely on MPC splitting to maximize use of DET anymore. Reviewed-by: Rodrigo Siqueira Acked-by: Alex Hung Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index bb1d880eee1ea..2278181ad3d60 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -843,7 +843,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .pipe_split_policy = MPC_SPLIT_AVOID, // Due to CRB, no need to MPC split anymore .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index 73d2a653a2791..ea5160d0c92e9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -844,7 +844,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .pipe_split_policy = MPC_SPLIT_AVOID, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, From b1babe8623f91a71a5acc34eb323c718e0413b74 Mon Sep 17 00:00:00 2001 From: Alvin Lee Date: Fri, 3 Jun 2022 16:39:48 -0400 Subject: [PATCH 131/134] drm/amd/display: Add debug option for idle optimizations on cursor updates For optimizations and debug purposes we added an option to exit idle operations on cursor updates. Acked-by: Alex Hung Signed-off-by: Alvin Lee Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 1 + drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 2278181ad3d60..9a26d24b579f7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -868,6 +868,7 @@ static const struct dc_debug_options debug_defaults_drv = { }, .use_max_lb = true, .force_disable_subvp = true, + .exit_idle_opt_for_cursor_updates = true, .enable_single_display_2to1_odm_policy = true, .enable_dp_dig_pixel_rate_div_policy = 1, }; diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index ea5160d0c92e9..8157e40d2c7ef 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -869,6 +869,7 @@ static const struct dc_debug_options debug_defaults_drv = { }, .use_max_lb = true, .force_disable_subvp = true, + .exit_idle_opt_for_cursor_updates = true, .enable_single_display_2to1_odm_policy = true, .enable_dp_dig_pixel_rate_div_policy = 1, }; From c13423c63f73ee2f9807fdb2a3a7e647421a8114 Mon Sep 17 00:00:00 2001 From: Leo Li Date: Thu, 4 Nov 2021 19:41:55 -0400 Subject: [PATCH 132/134] drm/amd/display: Copy crc_skip_count when duplicating CRTC state [Why] crc_skip_count is used to track how many frames to skip to allow the OTG CRC engine to "warm up" before it outputs correct CRC values. Experimentally, this seems to be 2 frames. When duplicating CRTC states, this value was not copied to the duplicated state. Therefore, when this state is committed, we will needlessly wait 2 frames before outputing CRC values. Even if the CRC engine is already warmed up. [How] Copy the crc_skip_count as part of dm_crtc_duplicate_state. Acked-by: Alex Hung Signed-off-by: Leo Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 828189cb34417..594fe8a4d02b0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -243,6 +243,7 @@ static struct drm_crtc_state *dm_crtc_duplicate_state(struct drm_crtc *crtc) state->freesync_config = cur->freesync_config; state->cm_has_degamma = cur->cm_has_degamma; state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb; + state->crc_skip_count = cur->crc_skip_count; state->mpo_requested = cur->mpo_requested; /* TODO Duplicate dc_stream after objects are stream object is flattened */ From d58715704c5c7d82d7194540780fb335ab337da0 Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Sun, 17 Jul 2022 11:41:44 -0400 Subject: [PATCH 133/134] drm/amd/display: 3.2.196 This version brings along following fixes: - Copy crc_skip_count when duplicating CRTC state - Add debug option for idle optimizations on cursor updates - Disable MPC split for DCN32/321 - Add missing ODM 2:1 policy logic - Update DCN32 and DCN321 SR latencies - Add reinstate dram in the FPO logic - Add dc_ctx to link_enc_create() parameters - Cache cursor when cursor exceeds 64x64 - Add support for manual DMUB FAMS trigger - Fix dpstreamclk programming - Add missing AUDIO_DTO_SEL reg field - Add OTG/ODM functions - Use correct clock source constructor for DCN314 - Use correct DTO_SRC_SEL for 128b/132b encoding - Add pixel rate div calcs and programming - Remove FPU flags from DCN30 Makefile - Create patch bounding box function for isolate FPU - Move mclk calculation function to DML - Remove FPU operations from dcn201 resources - Fallback to SW cursor if SubVP + cursor too big - Drop unnecessary FPU flags on dcn302 files - Reboot while unplug hdcp enabled dp from mst hub - Reset pipe count when iterating for DET override - Calculate MALL cache lines based on Mblks required - Fix two MPO videos in single display ODM combine mode - Guard against zero memory channels - Updates SubVP + SubVP DRR cases updates - Fix OPTC function pointers for DCN314 - Add enable/disable FIFO callbacks to stream setup - Avoid MPC infinite loop Acked-by: Alex Hung Signed-off-by: Anthony Koo Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index d05bbe193bfa7..8e1e40083ec83 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -47,7 +47,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.195" +#define DC_VER "3.2.196" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 1b54a0121dba12af268fb75c413feabdb9f573d4 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Fri, 22 Jul 2022 13:56:17 -0400 Subject: [PATCH 134/134] drm/amd/display: Reduce stack size in the mode support function When we use the allmodconfig option we see the following error: drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c: In function 'dml32_ModeSupportAndSystemConfigurationFull': drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:3799:1: error: the frame size of 2464 bytes is larger than 2048 bytes [-Werror=frame-larger-than=] 3799 | } // ModeSupportAndSystemConfigurationFull This commit fixes this issue by moving part of the mode support operation from ModeSupportAndSystemConfigurationFull to a dedicated function. Cc: Harry Wentland Cc: Alex Deucher Cc: Aurabindo Pillai Cc: Stephen Rothwell Tested-by: Stephen Rothwell Acked-by: Alex Deucher Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../dc/dml/dcn32/display_mode_vba_32.c | 132 ++++++++++-------- 1 file changed, 70 insertions(+), 62 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index db3e43499a26d..890612db08dc4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -1654,6 +1654,75 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman #endif } +static void mode_support_configuration(struct vba_vars_st *v, + struct display_mode_lib *mode_lib) +{ + int i, j; + + for (i = v->soc.num_states - 1; i >= 0; i--) { + for (j = 0; j < 2; j++) { + if (mode_lib->vba.ScaleRatioAndTapsSupport == true + && mode_lib->vba.SourceFormatPixelAndScanSupport == true + && mode_lib->vba.ViewportSizeSupport[i][j] == true + && !mode_lib->vba.LinkRateDoesNotMatchDPVersion + && !mode_lib->vba.LinkRateForMultistreamNotIndicated + && !mode_lib->vba.BPPForMultistreamNotIndicated + && !mode_lib->vba.MultistreamWithHDMIOreDP + && !mode_lib->vba.ExceededMultistreamSlots[i] + && !mode_lib->vba.MSOOrODMSplitWithNonDPLink + && !mode_lib->vba.NotEnoughLanesForMSO + && mode_lib->vba.LinkCapacitySupport[i] == true && !mode_lib->vba.P2IWith420 + && !mode_lib->vba.DSCOnlyIfNecessaryWithBPP + && !mode_lib->vba.DSC422NativeNotSupported + && !mode_lib->vba.MPCCombineMethodIncompatible + && mode_lib->vba.ODMCombine2To1SupportCheckOK[i] == true + && mode_lib->vba.ODMCombine4To1SupportCheckOK[i] == true + && mode_lib->vba.NotEnoughDSCUnits[i] == false + && !mode_lib->vba.NotEnoughDSCSlices[i] + && !mode_lib->vba.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe + && !mode_lib->vba.InvalidCombinationOfMALLUseForPStateAndStaticScreen + && mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] == false + && mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] + && mode_lib->vba.DTBCLKRequiredMoreThanSupported[i] == false + && !mode_lib->vba.InvalidCombinationOfMALLUseForPState + && !mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified + && mode_lib->vba.ROBSupport[i][j] == true + && mode_lib->vba.DISPCLK_DPPCLK_Support[i][j] == true + && mode_lib->vba.TotalAvailablePipesSupport[i][j] == true + && mode_lib->vba.NumberOfOTGSupport == true + && mode_lib->vba.NumberOfHDMIFRLSupport == true + && mode_lib->vba.EnoughWritebackUnits == true + && mode_lib->vba.WritebackLatencySupport == true + && mode_lib->vba.WritebackScaleRatioAndTapsSupport == true + && mode_lib->vba.CursorSupport == true && mode_lib->vba.PitchSupport == true + && mode_lib->vba.ViewportExceedsSurface == false + && mode_lib->vba.PrefetchSupported[i][j] == true + && mode_lib->vba.VActiveBandwithSupport[i][j] == true + && mode_lib->vba.DynamicMetadataSupported[i][j] == true + && mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][j] == true + && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true + && mode_lib->vba.PTEBufferSizeNotExceeded[i][j] == true + && mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] == true + && mode_lib->vba.NonsupportedDSCInputBPC == false + && !mode_lib->vba.ExceededMALLSize + && ((mode_lib->vba.HostVMEnable == false + && !mode_lib->vba.ImmediateFlipRequiredFinal) + || mode_lib->vba.ImmediateFlipSupportedForState[i][j]) + && (!mode_lib->vba.DRAMClockChangeRequirementFinal + || i == v->soc.num_states - 1 + || mode_lib->vba.DRAMClockChangeSupport[i][j] != dm_dram_clock_change_unsupported) + && (!mode_lib->vba.FCLKChangeRequirementFinal || i == v->soc.num_states - 1 + || mode_lib->vba.FCLKChangeSupport[i][j] != dm_fclock_change_unsupported) + && (!mode_lib->vba.USRRetrainingRequiredFinal + || mode_lib->vba.USRRetrainingSupport[i][j])) { + mode_lib->vba.ModeSupport[i][j] = true; + } else { + mode_lib->vba.ModeSupport[i][j] = false; + } + } + } +} + void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) { struct vba_vars_st *v = &mode_lib->vba; @@ -3632,68 +3701,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } /*Mode Support, Voltage State and SOC Configuration*/ - for (i = v->soc.num_states - 1; i >= 0; i--) { - for (j = 0; j < 2; j++) { - if (mode_lib->vba.ScaleRatioAndTapsSupport == true - && mode_lib->vba.SourceFormatPixelAndScanSupport == true - && mode_lib->vba.ViewportSizeSupport[i][j] == true - && !mode_lib->vba.LinkRateDoesNotMatchDPVersion - && !mode_lib->vba.LinkRateForMultistreamNotIndicated - && !mode_lib->vba.BPPForMultistreamNotIndicated - && !mode_lib->vba.MultistreamWithHDMIOreDP - && !mode_lib->vba.ExceededMultistreamSlots[i] - && !mode_lib->vba.MSOOrODMSplitWithNonDPLink - && !mode_lib->vba.NotEnoughLanesForMSO - && mode_lib->vba.LinkCapacitySupport[i] == true && !mode_lib->vba.P2IWith420 - && !mode_lib->vba.DSCOnlyIfNecessaryWithBPP - && !mode_lib->vba.DSC422NativeNotSupported - && !mode_lib->vba.MPCCombineMethodIncompatible - && mode_lib->vba.ODMCombine2To1SupportCheckOK[i] == true - && mode_lib->vba.ODMCombine4To1SupportCheckOK[i] == true - && mode_lib->vba.NotEnoughDSCUnits[i] == false - && !mode_lib->vba.NotEnoughDSCSlices[i] - && !mode_lib->vba.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe - && !mode_lib->vba.InvalidCombinationOfMALLUseForPStateAndStaticScreen - && mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] == false - && mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] - && mode_lib->vba.DTBCLKRequiredMoreThanSupported[i] == false - && !mode_lib->vba.InvalidCombinationOfMALLUseForPState - && !mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified - && mode_lib->vba.ROBSupport[i][j] == true - && mode_lib->vba.DISPCLK_DPPCLK_Support[i][j] == true - && mode_lib->vba.TotalAvailablePipesSupport[i][j] == true - && mode_lib->vba.NumberOfOTGSupport == true - && mode_lib->vba.NumberOfHDMIFRLSupport == true - && mode_lib->vba.EnoughWritebackUnits == true - && mode_lib->vba.WritebackLatencySupport == true - && mode_lib->vba.WritebackScaleRatioAndTapsSupport == true - && mode_lib->vba.CursorSupport == true && mode_lib->vba.PitchSupport == true - && mode_lib->vba.ViewportExceedsSurface == false - && mode_lib->vba.PrefetchSupported[i][j] == true - && mode_lib->vba.VActiveBandwithSupport[i][j] == true - && mode_lib->vba.DynamicMetadataSupported[i][j] == true - && mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][j] == true - && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true - && mode_lib->vba.PTEBufferSizeNotExceeded[i][j] == true - && mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] == true - && mode_lib->vba.NonsupportedDSCInputBPC == false - && !mode_lib->vba.ExceededMALLSize - && ((mode_lib->vba.HostVMEnable == false - && !mode_lib->vba.ImmediateFlipRequiredFinal) - || mode_lib->vba.ImmediateFlipSupportedForState[i][j]) - && (!mode_lib->vba.DRAMClockChangeRequirementFinal - || i == v->soc.num_states - 1 - || mode_lib->vba.DRAMClockChangeSupport[i][j] != dm_dram_clock_change_unsupported) - && (!mode_lib->vba.FCLKChangeRequirementFinal || i == v->soc.num_states - 1 - || mode_lib->vba.FCLKChangeSupport[i][j] != dm_fclock_change_unsupported) - && (!mode_lib->vba.USRRetrainingRequiredFinal - || mode_lib->vba.USRRetrainingSupport[i][j])) { - mode_lib->vba.ModeSupport[i][j] = true; - } else { - mode_lib->vba.ModeSupport[i][j] = false; - } - } - } + mode_support_configuration(v, mode_lib); MaximumMPCCombine = 0;