diff --git a/Documentation/gpu/amdgpu/display/dc-glossary.rst b/Documentation/gpu/amdgpu/display/dc-glossary.rst index 116f5f0942fd7..0b0ffd428dd2e 100644 --- a/Documentation/gpu/amdgpu/display/dc-glossary.rst +++ b/Documentation/gpu/amdgpu/display/dc-glossary.rst @@ -170,7 +170,7 @@ consider asking in the amdgfx and update this page. MC Memory Controller - MPC + MPC/MPCC Multiple pipes and plane combine MPO diff --git a/Documentation/gpu/amdgpu/display/dcn2_cm_drm_current.svg b/Documentation/gpu/amdgpu/display/dcn2_cm_drm_current.svg new file mode 100644 index 0000000000000..315ffc5a1a4b0 --- /dev/null +++ b/Documentation/gpu/amdgpu/display/dcn2_cm_drm_current.svg @@ -0,0 +1,1370 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Matrix + 1D LUT + 3D LUT + Unpacking + Other + drm_framebuffer + format + drm_plane + drm_crtc + Stream + MPC + DPP + + Blender + Degamma + CTM + Gamma + format + bias_and_scale + color space matrix + input_csc_color_matrix + in_transfer_func + hdr_mult + gamut_remap_matrix + in_shaper_func + lut3d_func + blend_tf + Blender + gamut_remap_matrix + func_shaper + lut3d_func + out_transfer_func + csc_color_matrix + bit_depth_param + clamping + output_color_space + Plane + Legend + DCN 2.0 + DC Interface + DRM Interface + + CNVC + Input CSC + DeGammaRAM and ROM(sRGB, BT2020 + HDR Multiply + Gamut Remap + Shaper LUTRAM + 3D LUTRAM + Blend Gamma + Blender + GammaRAM + OCSC + + + color_encoding + + pixel_blend_mode + + color_range + + + + + + + + + + + + + + diff --git a/Documentation/gpu/amdgpu/display/dcn3_cm_drm_current.svg b/Documentation/gpu/amdgpu/display/dcn3_cm_drm_current.svg new file mode 100644 index 0000000000000..7299ee9b6d649 --- /dev/null +++ b/Documentation/gpu/amdgpu/display/dcn3_cm_drm_current.svg @@ -0,0 +1,1529 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Matrix + 1D LUT + 3D LUT + Unpacking + Other + drm_framebuffer + format + drm_plane + drm_crtc + Stream + MPC + DPP + + Blender + Degamma + CTM + Gamma + format + bias_and_scale + color space matrix + input_csc_color_matrix + in_transfer_func + hdr_mult + gamut_remap_matrix + in_shaper_func + lut3d_func + blend_tf + Blender + gamut_remap_matrix + func_shaper + lut3d_func + out_transfer_func + csc_color_matrix + bit_depth_param + clamping + output_color_space + Plane + Legend + DCN 3.0 + DC Interface + DRM Interface + + CNVC + Input CSC + DeGammaROM(sRGB, BT2020, Gamma 2.2,PQ, HLG) + Post CSC + Gamma Correction + HDR Multiply + Gamut Remap + Shaper LUTRAM + 3D LUTRAM + Blend Gamma + Blender + Gamut Remap + Shaper LUTRAM + 3D LUTRAM + GammaRAM + OCSC + + + color_encoding + + pixel_blend_mode + + color_range + + + + + + + + + + + + + + + + + + diff --git a/Documentation/gpu/amdgpu/display/display-manager.rst b/Documentation/gpu/amdgpu/display/display-manager.rst index 7ce31f89d9a09..b7abb18cfc820 100644 --- a/Documentation/gpu/amdgpu/display/display-manager.rst +++ b/Documentation/gpu/amdgpu/display/display-manager.rst @@ -40,3 +40,144 @@ Atomic Implementation .. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c :functions: amdgpu_dm_atomic_check amdgpu_dm_atomic_commit_tail + +Color Management Properties +=========================== + +.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c + :doc: overview + +.. kernel-doc:: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c + :internal: + + +DC Color Capabilities between DCN generations +--------------------------------------------- + +DRM/KMS framework defines three CRTC color correction properties: degamma, +color transformation matrix (CTM) and gamma, and two properties for degamma and +gamma LUT sizes. AMD DC programs some of the color correction features +pre-blending but DRM/KMS has not per-plane color correction properties. + +In general, the DRM CRTC color properties are programmed to DC, as follows: +CRTC gamma after blending, and CRTC degamma pre-blending. Although CTM is +programmed after blending, it is mapped to DPP hw blocks (pre-blending). Other +color caps available in the hw is not currently exposed by DRM interface and +are bypassed. + +.. kernel-doc:: drivers/gpu/drm/amd/display/dc/dc.h + :doc: color-management-caps + +.. kernel-doc:: drivers/gpu/drm/amd/display/dc/dc.h + :internal: + +The color pipeline has undergone major changes between DCN hardware +generations. What's possible to do before and after blending depends on +hardware capabilities, as illustrated below by the DCN 2.0 and DCN 3.0 families +schemas. + +**DCN 2.0 family color caps and mapping** + +.. kernel-figure:: dcn2_cm_drm_current.svg + +**DCN 3.0 family color caps and mapping** + +.. kernel-figure:: dcn3_cm_drm_current.svg + +Blend Mode Properties +===================== + +Pixel blend mode is a DRM plane composition property of :c:type:`drm_plane` used to +describes how pixels from a foreground plane (fg) are composited with the +background plane (bg). Here, we present main concepts of DRM blend mode to help +to understand how this property is mapped to AMD DC interface. See more about +this DRM property and the alpha blending equations in :ref:`DRM Plane +Composition Properties `. + +Basically, a blend mode sets the alpha blending equation for plane +composition that fits the mode in which the alpha channel affects the state of +pixel color values and, therefore, the resulted pixel color. For +example, consider the following elements of the alpha blending equation: + +- *fg.rgb*: Each of the RGB component values from the foreground's pixel. +- *fg.alpha*: Alpha component value from the foreground's pixel. +- *bg.rgb*: Each of the RGB component values from the background. +- *plane_alpha*: Plane alpha value set by the **plane "alpha" property**, see + more in :ref:`DRM Plane Composition Properties `. + +in the basic alpha blending equation:: + + out.rgb = alpha * fg.rgb + (1 - alpha) * bg.rgb + +the alpha channel value of each pixel in a plane is ignored and only the plane +alpha affects the resulted pixel color values. + +DRM has three blend mode to define the blend formula in the plane composition: + +* **None**: Blend formula that ignores the pixel alpha. + +* **Pre-multiplied**: Blend formula that assumes the pixel color values in a + plane was already pre-multiplied by its own alpha channel before storage. + +* **Coverage**: Blend formula that assumes the pixel color values were not + pre-multiplied with the alpha channel values. + +and pre-multiplied is the default pixel blend mode, that means, when no blend +mode property is created or defined, DRM considers the plane's pixels has +pre-multiplied color values. On IGT GPU tools, the kms_plane_alpha_blend test +provides a set of subtests to verify plane alpha and blend mode properties. + +The DRM blend mode and its elements are then mapped by AMDGPU display manager +(DM) to program the blending configuration of the Multiple Pipe/Plane Combined +(MPC), as follows: + +.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h + :doc: mpc-overview + +.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h + :functions: mpcc_blnd_cfg + +Therefore, the blending configuration for a single MPCC instance on the MPC +tree is defined by :c:type:`mpcc_blnd_cfg`, where +:c:type:`pre_multiplied_alpha` is the alpha pre-multiplied mode flag used to +set :c:type:`MPCC_ALPHA_MULTIPLIED_MODE`. It controls whether alpha is +multiplied (true/false), being only true for DRM pre-multiplied blend mode. +:c:type:`mpcc_alpha_blend_mode` defines the alpha blend mode regarding pixel +alpha and plane alpha values. It sets one of the three modes for +:c:type:`MPCC_ALPHA_BLND_MODE`, as described below. + +.. kernel-doc:: drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h + :functions: mpcc_alpha_blend_mode + +DM then maps the elements of `enum mpcc_alpha_blend_mode` to those in the DRM +blend formula, as follows: + +* *MPC pixel alpha* matches *DRM fg.alpha* as the alpha component value + from the plane's pixel +* *MPC global alpha* matches *DRM plane_alpha* when the pixel alpha should + be ignored and, therefore, pixel values are not pre-multiplied +* *MPC global gain* assumes *MPC global alpha* value when both *DRM + fg.alpha* and *DRM plane_alpha* participate in the blend equation + +In short, *fg.alpha* is ignored by selecting +:c:type:`MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA`. On the other hand, (plane_alpha * +fg.alpha) component becomes available by selecting +:c:type:`MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN`. And the +:c:type:`MPCC_ALPHA_MULTIPLIED_MODE` defines if the pixel color values are +pre-multiplied by alpha or not. + +Blend configuration flow +------------------------ + +The alpha blending equation is configured from DRM to DC interface by the +following path: + +1. When updating a :c:type:`drm_plane_state `, DM calls + :c:type:`fill_blending_from_plane_state()` that maps + :c:type:`drm_plane_state ` attributes to + :c:type:`dc_plane_info ` struct to be handled in the + OS-agnostic component (DC). + +2. On DC interface, :c:type:`struct mpcc_blnd_cfg ` programs the + MPCC blend configuration considering the :c:type:`dc_plane_info + ` input from DPP. diff --git a/Documentation/gpu/drm-kms.rst b/Documentation/gpu/drm-kms.rst index 6f9c064fd3234..b4377a545425b 100644 --- a/Documentation/gpu/drm-kms.rst +++ b/Documentation/gpu/drm-kms.rst @@ -532,6 +532,8 @@ Standard Plane Properties .. kernel-doc:: drivers/gpu/drm/drm_plane.c :doc: standard plane properties +.. _plane_composition_properties: + Plane Composition Properties ---------------------------- diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 5f2470c34334a..782bc7d05f3c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -79,7 +79,7 @@ amdgpu-y += \ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \ nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o \ - nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o + sienna_cichlid.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o # add DF block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2c4bdc0530cc8..1372e2b475418 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -275,6 +275,9 @@ extern int amdgpu_vcnfw_log; #define AMDGPU_RESET_VCE (1 << 13) #define AMDGPU_RESET_VCE1 (1 << 14) +#define AMDGPU_RESET_LEVEL_SOFT_RECOVERY (1 << 0) +#define AMDGPU_RESET_LEVEL_MODE2 (1 << 1) + /* max cursor sizes (in pixels) */ #define CIK_CURSOR_WIDTH 128 #define CIK_CURSOR_HEIGHT 128 @@ -1095,6 +1098,9 @@ struct amdgpu_device { uint32_t scpm_status; struct work_struct reset_work; + + uint32_t amdgpu_reset_level_mask; + bool job_hang; }; static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 28f27df5917e3..7a93858744ac7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -136,6 +136,7 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); amdgpu_device_gpu_recover(adev, NULL, &reset_context); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 15e1893e0b74d..27670770b384e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1752,7 +1752,7 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f, { struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; char reg_offset[11]; - uint32_t *new, *tmp = NULL; + uint32_t *new = NULL, *tmp = NULL; int ret, i = 0, len = 0; do { @@ -1799,7 +1799,8 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f, ret = size; error_free: - kfree(tmp); + if (tmp != new) + kfree(tmp); kfree(new); return ret; } @@ -1839,6 +1840,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) } #endif + debugfs_create_u32("amdgpu_reset_level", 0600, root, &adev->amdgpu_reset_level_mask); + /* Register debugfs entries for amdgpu_ttm */ amdgpu_ttm_debugfs_init(adev); amdgpu_debugfs_pm_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 75c43bf9dc4e3..ebb722811dcf7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5097,6 +5097,7 @@ static void amdgpu_device_recheck_guilty_jobs( /* set guilty */ drm_sched_increase_karma(s_job); + amdgpu_reset_prepare_hwcontext(adev, reset_context); retry: /* do hw reset */ if (amdgpu_sriov_vf(adev)) { @@ -5206,6 +5207,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, reset_context->job = job; reset_context->hive = hive; + /* * Build list of devices to reset. * In case we are in XGMI hive mode, resort the device list @@ -5329,8 +5331,11 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, amdgpu_ras_resume(adev); } else { r = amdgpu_do_asic_reset(device_list_handle, reset_context); - if (r && r == -EAGAIN) + if (r && r == -EAGAIN) { + set_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags); + adev->asic_reset_res = 0; goto retry; + } } skip_hw_reset: @@ -5760,6 +5765,7 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) reset_context.reset_req_dev = adev; set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); + set_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); adev->no_hw_access = true; r = amdgpu_device_pre_asic_reset(adev, &reset_context); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 222d3d7ea0767..454a78ba60d43 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -477,7 +477,7 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev) kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i], RESET_QUEUES, 0, 0); - if (adev->gfx.kiq.ring.sched.ready) + if (adev->gfx.kiq.ring.sched.ready && !adev->job_hang) r = amdgpu_ring_test_helper(kiq_ring); spin_unlock(&adev->gfx.kiq.ring_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h index beabab515836a..c7b44aeb671b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h @@ -35,6 +35,9 @@ struct amdgpu_gfxhub_funcs { void (*init)(struct amdgpu_device *adev); int (*get_xgmi_info)(struct amdgpu_device *adev); void (*utcl2_harvest)(struct amdgpu_device *adev); + void (*mode2_save_regs)(struct amdgpu_device *adev); + void (*mode2_restore_regs)(struct amdgpu_device *adev); + void (*halt)(struct amdgpu_device *adev); }; struct amdgpu_gfxhub { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 008eaca271519..0305b660cd17d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -264,6 +264,32 @@ struct amdgpu_gmc { u64 mall_size; /* number of UMC instances */ int num_umc; + /* mode2 save restore */ + u64 VM_L2_CNTL; + u64 VM_L2_CNTL2; + u64 VM_DUMMY_PAGE_FAULT_CNTL; + u64 VM_DUMMY_PAGE_FAULT_ADDR_LO32; + u64 VM_DUMMY_PAGE_FAULT_ADDR_HI32; + u64 VM_L2_PROTECTION_FAULT_CNTL; + u64 VM_L2_PROTECTION_FAULT_CNTL2; + u64 VM_L2_PROTECTION_FAULT_MM_CNTL3; + u64 VM_L2_PROTECTION_FAULT_MM_CNTL4; + u64 VM_L2_PROTECTION_FAULT_ADDR_LO32; + u64 VM_L2_PROTECTION_FAULT_ADDR_HI32; + u64 VM_DEBUG; + u64 VM_L2_MM_GROUP_RT_CLASSES; + u64 VM_L2_BANK_SELECT_RESERVED_CID; + u64 VM_L2_BANK_SELECT_RESERVED_CID2; + u64 VM_L2_CACHE_PARITY_CNTL; + u64 VM_L2_IH_LOG_CNTL; + u64 VM_CONTEXT_CNTL[16]; + u64 VM_CONTEXT_PAGE_TABLE_BASE_ADDR_LO32[16]; + u64 VM_CONTEXT_PAGE_TABLE_BASE_ADDR_HI32[16]; + u64 VM_CONTEXT_PAGE_TABLE_START_ADDR_LO32[16]; + u64 VM_CONTEXT_PAGE_TABLE_START_ADDR_HI32[16]; + u64 VM_CONTEXT_PAGE_TABLE_END_ADDR_LO32[16]; + u64 VM_CONTEXT_PAGE_TABLE_END_ADDR_HI32[16]; + u64 MC_VM_MX_L1_TLB_CNTL; }; #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 2b155ad637d28..723721bdd6bf9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -49,6 +49,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) } memset(&ti, 0, sizeof(struct amdgpu_task_info)); + adev->job_hang = true; if (amdgpu_gpu_recovery && amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) { @@ -71,6 +72,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context); if (r) @@ -82,6 +84,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) } exit: + adev->job_hang = false; drm_dev_exit(idx); return DRM_GPU_SCHED_STAT_NOMINAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 4e4460411ec50..c2d3d3e456f1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1953,6 +1953,7 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 32c86a0b145ca..9da5ead50c900 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -23,6 +23,7 @@ #include "amdgpu_reset.h" #include "aldebaran.h" +#include "sienna_cichlid.h" int amdgpu_reset_add_handler(struct amdgpu_reset_control *reset_ctl, struct amdgpu_reset_handler *handler) @@ -36,10 +37,15 @@ int amdgpu_reset_init(struct amdgpu_device *adev) { int ret = 0; + adev->amdgpu_reset_level_mask = 0x1; + switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 2): ret = aldebaran_reset_init(adev); break; + case IP_VERSION(11, 0, 7): + ret = sienna_cichlid_reset_init(adev); + break; default: break; } @@ -55,6 +61,9 @@ int amdgpu_reset_fini(struct amdgpu_device *adev) case IP_VERSION(13, 0, 2): ret = aldebaran_reset_fini(adev); break; + case IP_VERSION(11, 0, 7): + ret = sienna_cichlid_reset_fini(adev); + break; default: break; } @@ -67,6 +76,12 @@ int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev, { struct amdgpu_reset_handler *reset_handler = NULL; + if (!(adev->amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_MODE2)) + return -ENOSYS; + + if (test_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags)) + return -ENOSYS; + if (adev->reset_cntl && adev->reset_cntl->get_reset_handler) reset_handler = adev->reset_cntl->get_reset_handler( adev->reset_cntl, reset_context); @@ -83,6 +98,12 @@ int amdgpu_reset_perform_reset(struct amdgpu_device *adev, int ret; struct amdgpu_reset_handler *reset_handler = NULL; + if (!(adev->amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_MODE2)) + return -ENOSYS; + + if (test_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context->flags)) + return -ENOSYS; + if (adev->reset_cntl) reset_handler = adev->reset_cntl->get_reset_handler( adev->reset_cntl, reset_context); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index ffda1560c6481..f71b83c425908 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -30,6 +30,7 @@ enum AMDGPU_RESET_FLAGS { AMDGPU_NEED_FULL_RESET = 0, AMDGPU_SKIP_HW_RESET = 1, + AMDGPU_SKIP_MODE2_RESET = 2, }; struct amdgpu_reset_context { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index d3558c34d406c..3e316b013fd95 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -405,6 +405,9 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, { ktime_t deadline = ktime_add_us(ktime_get(), 10000); + if (!(ring->adev->amdgpu_reset_level_mask & AMDGPU_RESET_LEVEL_SOFT_RECOVERY)) + return false; + if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence) return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0daf112debeb7..6609b9177fc08 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -714,6 +714,8 @@ struct amdgpu_ttm_tt { #endif /* HAVE_AMDKCL_HMM_MIRROR_ENABLED */ }; +#define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm) + #ifdef CONFIG_DRM_AMDGPU_USERPTR #ifdef HAVE_AMDKCL_HMM_MIRROR_ENABLED /* @@ -726,7 +728,7 @@ struct amdgpu_ttm_tt { int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) { struct ttm_tt *ttm = bo->tbo.ttm; - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); unsigned long start = gtt->userptr; struct vm_area_struct *vma; struct mm_struct *mm; @@ -780,7 +782,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) */ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); bool r = false; if (!gtt || !gtt->userptr) @@ -961,7 +963,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev, struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); enum dma_data_direction direction = write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE; @@ -998,7 +1000,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev, struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); enum dma_data_direction direction = write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE; @@ -1045,7 +1047,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, { struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo); struct ttm_tt *ttm = tbo->ttm; - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); if (amdgpu_bo_encrypted(abo)) flags |= AMDGPU_PTE_TMZ; @@ -1083,7 +1085,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, struct ttm_resource *bo_mem) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - struct amdgpu_ttm_tt *gtt = (void*)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); uint64_t flags; int r; @@ -1153,7 +1155,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct ttm_operation_ctx ctx = { false, false }; - struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(bo->ttm); struct ttm_placement placement; struct ttm_place placements; struct ttm_resource *tmp; @@ -1224,7 +1226,7 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev, struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); /* if the pages have userptr pinning then clear that first */ if (gtt->userptr) { @@ -1254,7 +1256,7 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev, static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev, struct ttm_tt *ttm) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); if (gtt->usertask) put_task_struct(gtt->usertask); @@ -1308,7 +1310,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev, struct ttm_operation_ctx *ctx) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); pgoff_t i; int ret; @@ -1342,7 +1344,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_device *bdev, static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); struct amdgpu_device *adev; pgoff_t i; @@ -1411,7 +1413,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo, /* Set TTM_TT_FLAG_EXTERNAL before populate but after create. */ bo->ttm->page_flags |= TTM_TT_FLAG_EXTERNAL; - gtt = (void *)bo->ttm; + gtt = ttm_to_amdgpu_ttm_tt(bo->ttm); gtt->userptr = addr; gtt->userflags = flags; @@ -1435,7 +1437,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo, */ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); if (gtt == NULL) return NULL; @@ -1455,7 +1457,7 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end, unsigned long *userptr) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); unsigned long size; if (gtt == NULL || !gtt->userptr) @@ -1478,7 +1480,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, */ bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); if (gtt == NULL || !gtt->userptr) return false; @@ -1562,7 +1564,7 @@ bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) */ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); if (gtt == NULL) return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index 6c0ea34cc75e0..84a815389885b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -652,8 +652,7 @@ static int amdgpu_vkms_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = YRES_MAX; adev_to_drm(adev)->mode_config.preferred_depth = 24; - /* disable prefer shadow for now due to hibernation issues */ - adev_to_drm(adev)->mode_config.prefer_shadow = 0; + adev_to_drm(adev)->mode_config.prefer_shadow = 1; adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 9c038b037a284..3072c18221ad0 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2888,8 +2888,7 @@ static int dce_v10_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - /* disable prefer shadow for now due to hibernation issues */ - adev_to_drm(adev)->mode_config.prefer_shadow = 0; + adev_to_drm(adev)->mode_config.prefer_shadow = 1; #ifdef HAVE_DRM_MODE_CONFIG_FB_MODIFIERS_NOT_SUPPORTED adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index f7553590141d7..3d124b6648213 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -3006,8 +3006,7 @@ static int dce_v11_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - /* disable prefer shadow for now due to hibernation issues */ - adev_to_drm(adev)->mode_config.prefer_shadow = 0; + adev_to_drm(adev)->mode_config.prefer_shadow = 1; #ifdef HAVE_DRM_MODE_CONFIG_FB_MODIFIERS_NOT_SUPPORTED adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 2f5383ae0b3b6..f794839e5ecc4 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -2773,8 +2773,7 @@ static int dce_v6_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_width = 16384; adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - /* disable prefer shadow for now due to hibernation issues */ - adev_to_drm(adev)->mode_config.prefer_shadow = 0; + adev_to_drm(adev)->mode_config.prefer_shadow = 1; #ifdef HAVE_DRM_MODE_CONFIG_FB_MODIFIERS_NOT_SUPPORTED adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index adf6f2507e39d..d787144c8bd35 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2784,8 +2784,11 @@ static int dce_v8_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - /* disable prefer shadow for now due to hibernation issues */ - adev_to_drm(adev)->mode_config.prefer_shadow = 0; + if (adev->asic_type == CHIP_HAWAII) + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; + else + adev_to_drm(adev)->mode_config.prefer_shadow = 1; #ifdef HAVE_DRM_MODE_CONFIG_FB_MODIFIERS_NOT_SUPPORTED adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 53bc585246169..f61d7c7c9d262 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -5994,6 +5994,9 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); } + if (adev->job_hang && !enable) + return 0; + for (i = 0; i < adev->usec_timeout; i++) { if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0) break; @@ -7592,8 +7595,10 @@ static int gfx_v10_0_kiq_disable_kgq(struct amdgpu_device *adev) for (i = 0; i < adev->gfx.num_gfx_rings; i++) kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i], PREEMPT_QUEUES, 0, 0); - - return amdgpu_ring_test_helper(kiq_ring); + if (!adev->job_hang) + return amdgpu_ring_test_helper(kiq_ring); + else + return 0; } #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 6fd71cb10e54a..158d87e6805d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -53,6 +53,7 @@ #define GFX11_MEC_HPD_SIZE 2048 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L +#define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1 0x1388 #define regCGTT_WD_CLK_CTRL 0x5086 #define regCGTT_WD_CLK_CTRL_BASE_IDX 1 @@ -5279,6 +5280,38 @@ static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = { .update_spm_vmid = gfx_v11_0_update_spm_vmid, }; +static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) +{ + u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL); + + if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) + data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; + else + data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; + + WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data); + + // Program RLC_PG_DELAY3 for CGPG hysteresis + if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(11, 0, 1): + WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); + break; + default: + break; + } + } +} + +static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable) +{ + amdgpu_gfx_rlc_enter_safe_mode(adev); + + gfx_v11_cntl_power_gating(adev, enable); + + amdgpu_gfx_rlc_exit_safe_mode(adev); +} + static int gfx_v11_0_set_powergating_state(void *handle, enum amd_powergating_state state) { @@ -5293,6 +5326,11 @@ static int gfx_v11_0_set_powergating_state(void *handle, case IP_VERSION(11, 0, 2): amdgpu_gfx_off_ctrl(adev, enable); break; + case IP_VERSION(11, 0, 1): + gfx_v11_cntl_pg(adev, enable); + /* TODO: Enable this when GFXOFF is ready */ + // amdgpu_gfx_off_ctrl(adev, enable); + break; default: break; } @@ -5310,6 +5348,7 @@ static int gfx_v11_0_set_clockgating_state(void *handle, switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): gfx_v11_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index d8c5315811169..8cf53e039c115 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -576,6 +576,111 @@ static void gfxhub_v2_1_utcl2_harvest(struct amdgpu_device *adev) } } +static void gfxhub_v2_1_save_regs(struct amdgpu_device *adev) +{ + int i; + adev->gmc.VM_L2_CNTL = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL); + adev->gmc.VM_L2_CNTL2 = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2); + adev->gmc.VM_DUMMY_PAGE_FAULT_CNTL = RREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_CNTL); + adev->gmc.VM_DUMMY_PAGE_FAULT_ADDR_LO32 = RREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_ADDR_LO32); + adev->gmc.VM_DUMMY_PAGE_FAULT_ADDR_HI32 = RREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_ADDR_HI32); + adev->gmc.VM_L2_PROTECTION_FAULT_CNTL = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL); + adev->gmc.VM_L2_PROTECTION_FAULT_CNTL2 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL2); + adev->gmc.VM_L2_PROTECTION_FAULT_MM_CNTL3 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_MM_CNTL3); + adev->gmc.VM_L2_PROTECTION_FAULT_MM_CNTL4 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_MM_CNTL4); + adev->gmc.VM_L2_PROTECTION_FAULT_ADDR_LO32 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_ADDR_LO32); + adev->gmc.VM_L2_PROTECTION_FAULT_ADDR_HI32 = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_ADDR_HI32); + adev->gmc.VM_DEBUG = RREG32_SOC15(GC, 0, mmGCVM_DEBUG); + adev->gmc.VM_L2_MM_GROUP_RT_CLASSES = RREG32_SOC15(GC, 0, mmGCVM_L2_MM_GROUP_RT_CLASSES); + adev->gmc.VM_L2_BANK_SELECT_RESERVED_CID = RREG32_SOC15(GC, 0, mmGCVM_L2_BANK_SELECT_RESERVED_CID); + adev->gmc.VM_L2_BANK_SELECT_RESERVED_CID2 = RREG32_SOC15(GC, 0, mmGCVM_L2_BANK_SELECT_RESERVED_CID2); + adev->gmc.VM_L2_CACHE_PARITY_CNTL = RREG32_SOC15(GC, 0, mmGCVM_L2_CACHE_PARITY_CNTL); + adev->gmc.VM_L2_IH_LOG_CNTL = RREG32_SOC15(GC, 0, mmGCVM_L2_IH_LOG_CNTL); + + for (i = 0; i <= 15; i++) { + adev->gmc.VM_CONTEXT_CNTL[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, i); + adev->gmc.VM_CONTEXT_PAGE_TABLE_BASE_ADDR_LO32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, i * 2); + adev->gmc.VM_CONTEXT_PAGE_TABLE_BASE_ADDR_HI32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, i * 2); + adev->gmc.VM_CONTEXT_PAGE_TABLE_START_ADDR_LO32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, i * 2); + adev->gmc.VM_CONTEXT_PAGE_TABLE_START_ADDR_HI32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, i * 2); + adev->gmc.VM_CONTEXT_PAGE_TABLE_END_ADDR_LO32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, i * 2); + adev->gmc.VM_CONTEXT_PAGE_TABLE_END_ADDR_HI32[i] = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, i * 2); + } + + adev->gmc.MC_VM_MX_L1_TLB_CNTL = RREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL); +} + +static void gfxhub_v2_1_restore_regs(struct amdgpu_device *adev) +{ + int i; + WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL, adev->gmc.VM_L2_CNTL); + WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2, adev->gmc.VM_L2_CNTL2); + WREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_CNTL, adev->gmc.VM_DUMMY_PAGE_FAULT_CNTL); + WREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_ADDR_LO32, adev->gmc.VM_DUMMY_PAGE_FAULT_ADDR_LO32); + WREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_ADDR_HI32, adev->gmc.VM_DUMMY_PAGE_FAULT_ADDR_HI32); + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL, adev->gmc.VM_L2_PROTECTION_FAULT_CNTL); + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL2, adev->gmc.VM_L2_PROTECTION_FAULT_CNTL2); + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_MM_CNTL3, adev->gmc.VM_L2_PROTECTION_FAULT_MM_CNTL3); + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_MM_CNTL4, adev->gmc.VM_L2_PROTECTION_FAULT_MM_CNTL4); + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_ADDR_LO32, adev->gmc.VM_L2_PROTECTION_FAULT_ADDR_LO32); + WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_ADDR_HI32, adev->gmc.VM_L2_PROTECTION_FAULT_ADDR_HI32); + WREG32_SOC15(GC, 0, mmGCVM_DEBUG, adev->gmc.VM_DEBUG); + WREG32_SOC15(GC, 0, mmGCVM_L2_MM_GROUP_RT_CLASSES, adev->gmc.VM_L2_MM_GROUP_RT_CLASSES); + WREG32_SOC15(GC, 0, mmGCVM_L2_BANK_SELECT_RESERVED_CID, adev->gmc.VM_L2_BANK_SELECT_RESERVED_CID); + WREG32_SOC15(GC, 0, mmGCVM_L2_BANK_SELECT_RESERVED_CID2, adev->gmc.VM_L2_BANK_SELECT_RESERVED_CID2); + WREG32_SOC15(GC, 0, mmGCVM_L2_CACHE_PARITY_CNTL, adev->gmc.VM_L2_CACHE_PARITY_CNTL); + WREG32_SOC15(GC, 0, mmGCVM_L2_IH_LOG_CNTL, adev->gmc.VM_L2_IH_LOG_CNTL); + + for (i = 0; i <= 15; i++) { + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_CNTL, i, adev->gmc.VM_CONTEXT_CNTL[i]); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_BASE_ADDR_LO32[i]); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_BASE_ADDR_HI32[i]); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_START_ADDR_LO32[i]); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_START_ADDR_HI32[i]); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_END_ADDR_LO32[i]); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, i * 2, adev->gmc.VM_CONTEXT_PAGE_TABLE_END_ADDR_HI32[i]); + } + + WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_BASE, adev->gmc.vram_start >> 24); + WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_TOP, adev->gmc.vram_end >> 24); + WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, adev->gmc.MC_VM_MX_L1_TLB_CNTL); +} + +static void gfxhub_v2_1_halt(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + int i; + uint32_t tmp; + int time = 1000; + + gfxhub_v2_1_set_fault_enable_default(adev, false); + + for (i = 0; i <= 14; i++) { + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, ~0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, ~0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + 0); + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + 0); + } + tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); + while ((tmp & (GRBM_STATUS2__EA_BUSY_MASK | + GRBM_STATUS2__EA_LINK_BUSY_MASK)) != 0 && + time) { + udelay(100); + time--; + tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); + } + + if (!time) { + DRM_WARN("failed to wait for GRBM(EA) idle\n"); + } +} + const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = { .get_fb_location = gfxhub_v2_1_get_fb_location, .get_mc_fb_offset = gfxhub_v2_1_get_mc_fb_offset, @@ -586,4 +691,7 @@ const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = { .init = gfxhub_v2_1_init, .get_xgmi_info = gfxhub_v2_1_get_xgmi_info, .utcl2_harvest = gfxhub_v2_1_utcl2_harvest, + .mode2_save_regs = gfxhub_v2_1_save_regs, + .mode2_restore_regs = gfxhub_v2_1_restore_regs, + .halt = gfxhub_v2_1_halt, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 4f4aaed3a0974..1fca5d7dbd246 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1619,12 +1619,15 @@ static int gmc_v9_0_sw_init(void *handle) amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47); else amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) + adev->gmc.translate_further = adev->vm_manager.num_level > 1; break; case IP_VERSION(9, 4, 1): adev->num_vmhubs = 3; /* Keep the vm size same with Vega20 */ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); + adev->gmc.translate_further = adev->vm_manager.num_level > 1; break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 34b60e19b247a..4f66ee86086d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -291,6 +291,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); amdgpu_device_gpu_recover(adev, NULL, &reset_context); } diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index 03296aa12f2a1..329f2679ac5f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -318,6 +318,7 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); amdgpu_device_gpu_recover(adev, NULL, &reset_context); } diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index 288c414babdfa..fd14fa9b9cd7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -529,6 +529,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + clear_bit(AMDGPU_SKIP_MODE2_RESET, &reset_context.flags); amdgpu_device_gpu_recover(adev, NULL, &reset_context); } diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c new file mode 100644 index 0000000000000..7aa570c1ce4a9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c @@ -0,0 +1,303 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "sienna_cichlid.h" +#include "amdgpu_reset.h" +#include "amdgpu_amdkfd.h" +#include "amdgpu_dpm.h" +#include "amdgpu_job.h" +#include "amdgpu_ring.h" +#include "amdgpu_ras.h" +#include "amdgpu_psp.h" +#include "amdgpu_xgmi.h" + +static struct amdgpu_reset_handler * +sienna_cichlid_get_reset_handler(struct amdgpu_reset_control *reset_ctl, + struct amdgpu_reset_context *reset_context) +{ + struct amdgpu_reset_handler *handler; + struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; + + if (reset_context->method != AMD_RESET_METHOD_NONE) { + list_for_each_entry(handler, &reset_ctl->reset_handlers, + handler_list) { + if (handler->reset_method == reset_context->method) + return handler; + } + } else { + list_for_each_entry(handler, &reset_ctl->reset_handlers, + handler_list) { + if (handler->reset_method == AMD_RESET_METHOD_MODE2 && + adev->pm.fw_version >= 0x3a5500 && + !amdgpu_sriov_vf(adev)) { + reset_context->method = AMD_RESET_METHOD_MODE2; + return handler; + } + } + } + + return NULL; +} + +static int sienna_cichlid_mode2_suspend_ip(struct amdgpu_device *adev) +{ + int r, i; + + amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); + amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); + + for (i = adev->num_ip_blocks - 1; i >= 0; i--) { + if (!(adev->ip_blocks[i].version->type == + AMD_IP_BLOCK_TYPE_GFX || + adev->ip_blocks[i].version->type == + AMD_IP_BLOCK_TYPE_SDMA)) + continue; + + r = adev->ip_blocks[i].version->funcs->suspend(adev); + + if (r) { + dev_err(adev->dev, + "suspend of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } + adev->ip_blocks[i].status.hw = false; + } + + return r; +} + +static int +sienna_cichlid_mode2_prepare_hwcontext(struct amdgpu_reset_control *reset_ctl, + struct amdgpu_reset_context *reset_context) +{ + int r = 0; + struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; + + if (!amdgpu_sriov_vf(adev)) { + if (adev->gfxhub.funcs->mode2_save_regs) + adev->gfxhub.funcs->mode2_save_regs(adev); + if (adev->gfxhub.funcs->halt) + adev->gfxhub.funcs->halt(adev); + r = sienna_cichlid_mode2_suspend_ip(adev); + } + + return r; +} + +static void sienna_cichlid_async_reset(struct work_struct *work) +{ + struct amdgpu_reset_handler *handler; + struct amdgpu_reset_control *reset_ctl = + container_of(work, struct amdgpu_reset_control, reset_work); + struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; + + list_for_each_entry(handler, &reset_ctl->reset_handlers, + handler_list) { + if (handler->reset_method == reset_ctl->active_reset) { + dev_dbg(adev->dev, "Resetting device\n"); + handler->do_reset(adev); + break; + } + } +} + +static int sienna_cichlid_mode2_reset(struct amdgpu_device *adev) +{ + /* disable BM */ + pci_clear_master(adev->pdev); + return amdgpu_dpm_mode2_reset(adev); +} + +static int +sienna_cichlid_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl, + struct amdgpu_reset_context *reset_context) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; + int r; + + r = sienna_cichlid_mode2_reset(adev); + if (r) { + dev_err(adev->dev, + "ASIC reset failed with error, %d ", r); + } + return r; +} + +static int sienna_cichlid_mode2_restore_ip(struct amdgpu_device *adev) +{ + int i, r; + struct psp_context *psp = &adev->psp; + + r = psp_rlc_autoload_start(psp); + if (r) { + dev_err(adev->dev, "Failed to start rlc autoload\n"); + return r; + } + + /* Reinit GFXHUB */ + if (adev->gfxhub.funcs->mode2_restore_regs) + adev->gfxhub.funcs->mode2_restore_regs(adev); + adev->gfxhub.funcs->init(adev); + r = adev->gfxhub.funcs->gart_enable(adev); + if (r) { + dev_err(adev->dev, "GFXHUB gart reenable failed after reset\n"); + return r; + } + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { + r = adev->ip_blocks[i].version->funcs->resume(adev); + if (r) { + dev_err(adev->dev, + "resume of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } + + adev->ip_blocks[i].status.hw = true; + } + } + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!(adev->ip_blocks[i].version->type == + AMD_IP_BLOCK_TYPE_GFX || + adev->ip_blocks[i].version->type == + AMD_IP_BLOCK_TYPE_SDMA)) + continue; + r = adev->ip_blocks[i].version->funcs->resume(adev); + if (r) { + dev_err(adev->dev, + "resume of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } + + adev->ip_blocks[i].status.hw = true; + } + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!(adev->ip_blocks[i].version->type == + AMD_IP_BLOCK_TYPE_GFX || + adev->ip_blocks[i].version->type == + AMD_IP_BLOCK_TYPE_SDMA)) + continue; + + if (adev->ip_blocks[i].version->funcs->late_init) { + r = adev->ip_blocks[i].version->funcs->late_init( + (void *)adev); + if (r) { + dev_err(adev->dev, + "late_init of IP block <%s> failed %d after reset\n", + adev->ip_blocks[i].version->funcs->name, + r); + return r; + } + } + adev->ip_blocks[i].status.late_initialized = true; + } + + amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); + amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); + + return r; +} + +static int +sienna_cichlid_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, + struct amdgpu_reset_context *reset_context) +{ + int r; + struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle; + + dev_info(tmp_adev->dev, + "GPU reset succeeded, trying to resume\n"); + r = sienna_cichlid_mode2_restore_ip(tmp_adev); + if (r) + goto end; + + /* + * Add this ASIC as tracked as reset was already + * complete successfully. + */ + amdgpu_register_gpu_instance(tmp_adev); + + /* Resume RAS */ + amdgpu_ras_resume(tmp_adev); + + amdgpu_irq_gpu_reset_resume_helper(tmp_adev); + + r = amdgpu_ib_ring_tests(tmp_adev); + if (r) { + dev_err(tmp_adev->dev, + "ib ring test failed (%d).\n", r); + r = -EAGAIN; + goto end; + } + +end: + if (r) + return -EAGAIN; + else + return r; +} + +static struct amdgpu_reset_handler sienna_cichlid_mode2_handler = { + .reset_method = AMD_RESET_METHOD_MODE2, + .prepare_env = NULL, + .prepare_hwcontext = sienna_cichlid_mode2_prepare_hwcontext, + .perform_reset = sienna_cichlid_mode2_perform_reset, + .restore_hwcontext = sienna_cichlid_mode2_restore_hwcontext, + .restore_env = NULL, + .do_reset = sienna_cichlid_mode2_reset, +}; + +int sienna_cichlid_reset_init(struct amdgpu_device *adev) +{ + struct amdgpu_reset_control *reset_ctl; + + reset_ctl = kzalloc(sizeof(*reset_ctl), GFP_KERNEL); + if (!reset_ctl) + return -ENOMEM; + + reset_ctl->handle = adev; + reset_ctl->async_reset = sienna_cichlid_async_reset; + reset_ctl->active_reset = AMD_RESET_METHOD_NONE; + reset_ctl->get_reset_handler = sienna_cichlid_get_reset_handler; + + INIT_LIST_HEAD(&reset_ctl->reset_handlers); + INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset); + /* Only mode2 is handled through reset control now */ + amdgpu_reset_add_handler(reset_ctl, &sienna_cichlid_mode2_handler); + + adev->reset_cntl = reset_ctl; + + return 0; +} + +int sienna_cichlid_reset_fini(struct amdgpu_device *adev) +{ + kfree(adev->reset_cntl); + adev->reset_cntl = NULL; + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.h b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.h new file mode 100644 index 0000000000000..5213b162dacd3 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.h @@ -0,0 +1,32 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __SIENNA_CICHLID_H__ +#define __SIENNA_CICHLID_H__ + +#include "amdgpu.h" + +int sienna_cichlid_reset_init(struct amdgpu_device *adev); +int sienna_cichlid_reset_fini(struct amdgpu_device *adev); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 853dc0d1a01a4..77d549dbe2a8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -588,9 +588,16 @@ static int soc21_common_early_init(void *handle) break; case IP_VERSION(11, 0, 1): adev->cg_flags = + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_FGCG | + AMD_CG_SUPPORT_REPEATER_FGCG | + AMD_CG_SUPPORT_GFX_PERF_CLK | AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG; adev->pg_flags = + AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_JPEG; adev->external_rev_id = adev->rev_id + 0x1; break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index a1ba9e73c9c67..d7fb67b63f4bf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -884,7 +884,7 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, err = kfd_wait_on_events(p, args->num_events, (void __user *)args->events_ptr, (args->wait_for_all != 0), - args->timeout, &args->wait_result); + &args->timeout, &args->wait_result); return err; } @@ -2496,13 +2496,12 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, struct task_struct *thread = NULL; int r = 0, i; struct kfd_process *target = NULL; + struct kfd_process_device *pdd = NULL; struct pid *pid = NULL; uint32_t *user_array = NULL; uint32_t debug_trap_action; uint64_t exception_mask; - uint32_t data1; - uint32_t data2; - uint32_t data3; + uint32_t data1, data2, data3, data4; bool check_devices; bool need_user_array; uint32_t size_to_copy_to_user_array = 0; @@ -2513,6 +2512,7 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, data1 = args->data1; data2 = args->data2; data3 = args->data3; + data4 = args->data4; exception_mask = args->exception_mask; if (sched_policy == KFD_SCHED_POLICY_NO_HWS) { @@ -2607,8 +2607,8 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, debug_trap_action == KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE || debug_trap_action == KFD_IOC_DBG_TRAP_NODE_SUSPEND || debug_trap_action == KFD_IOC_DBG_TRAP_NODE_RESUME || - debug_trap_action == KFD_IOC_DBG_TRAP_SET_ADDRESS_WATCH || - debug_trap_action == KFD_IOC_DBG_TRAP_CLEAR_ADDRESS_WATCH || + debug_trap_action == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH || + debug_trap_action == KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH || debug_trap_action == KFD_IOC_DBG_TRAP_SET_PRECISE_MEM_OPS)) { r = -EPERM; goto unlock_out; @@ -2631,6 +2631,19 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, } } + if (debug_trap_action == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH || + debug_trap_action == KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH) { + uint32_t device_id = debug_trap_action == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH ? + data4 : data2; + int user_gpu_id = kfd_process_get_user_gpu_id(target, device_id); + + pdd = kfd_process_device_data_by_id(target, user_gpu_id); + if (user_gpu_id == -EINVAL || !pdd) { + r = -ENODEV; + goto unlock_out; + } + } + switch (debug_trap_action) { case KFD_IOC_DBG_TRAP_ENABLE: switch (data1) { @@ -2700,7 +2713,8 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, r = pqm_get_queue_snapshot(&target->pqm, exception_mask, /* Clear mask */ (void __user *)args->ptr, - args->data1); + args->data1, + &args->data2); args->data1 = r < 0 ? 0 : r; if (r > 0) @@ -2710,11 +2724,11 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, args->data1 = KFD_IOCTL_DBG_MAJOR_VERSION; args->data2 = KFD_IOCTL_DBG_MINOR_VERSION; break; - case KFD_IOC_DBG_TRAP_CLEAR_ADDRESS_WATCH: - r = kfd_dbg_trap_clear_address_watch(target, data1); + case KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH: + r = kfd_dbg_trap_clear_dev_address_watch(pdd, data1); break; - case KFD_IOC_DBG_TRAP_SET_ADDRESS_WATCH: - r = kfd_dbg_trap_set_address_watch(target, + case KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH: + r = kfd_dbg_trap_set_dev_address_watch(pdd, args->ptr, /* watch address */ data3, /* watch address mask */ &data1, /* watch id */ @@ -2740,7 +2754,8 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, r = kfd_dbg_trap_device_snapshot(target, exception_mask, (void __user *) args->ptr, - &args->data1); + &args->data1, + &args->data2); break; case KFD_IOC_DBG_TRAP_RUNTIME_ENABLE: if (data1) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index 6e8bcf42dc96d..5ab20f6dc2910 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -32,23 +32,12 @@ enum { }; /* - * A bitmask to indicate which watch points have been allocated. - * bit meaning: - * 0: unallocated/available - * 1: allocated/unavailable + * The spinlock protects the per device dev->alloc_watch_ids for multi-process access. + * The per-process per-device pdd->alloc_watch_ids is protected by the debug IOCTL + * process mutex. */ -static uint32_t allocated_debug_watch_points = ~((1 << MAX_WATCH_ADDRESSES) - 1); static DEFINE_SPINLOCK(watch_points_lock); -/* Allocate and free watch point IDs for debugger */ -static int kfd_allocate_debug_watch_point(struct kfd_process *p, - uint64_t watch_address, - uint32_t watch_address_mask, - uint32_t *watch_point, - uint32_t watch_mode); -static int kfd_release_debug_watch_points(struct kfd_process *p, - uint32_t watch_point_bit_mask_to_free); - int kfd_dbg_ev_query_debug_event(struct kfd_process *process, unsigned int *queue_id, unsigned int *gpu_id, @@ -316,6 +305,121 @@ int kfd_dbg_send_exception_to_runtime(struct kfd_process *p, return 0; } +#define KFD_DEBUGGER_INVALID_WATCH_POINT_ID -1 +static int kfd_dbg_get_dev_watch_id(struct kfd_process_device *pdd, int *watch_id) { + int i; + + *watch_id = KFD_DEBUGGER_INVALID_WATCH_POINT_ID; + + spin_lock(&watch_points_lock); + + for (i = 0; i < MAX_WATCH_ADDRESSES; i++) { + /* device watchpoint in use so skip */ + if ((pdd->dev->alloc_watch_ids >> i) & 0x1) + continue; + + pdd->alloc_watch_ids |= 0x1 << i; + pdd->dev->alloc_watch_ids |= 0x1 << i; + *watch_id = i; + spin_unlock(&watch_points_lock); + return 0; + } + + spin_unlock(&watch_points_lock); + + return -ENOMEM; +} + +static void kfd_dbg_clear_dev_watch_id(struct kfd_process_device *pdd, int watch_id) { + spin_lock(&watch_points_lock); + + /* process owns device watch point so safe to clear */ + if ((pdd->alloc_watch_ids >> watch_id) & 0x1) { + pdd->alloc_watch_ids &= ~(0x1 << watch_id); + pdd->dev->alloc_watch_ids &= ~(0x1 << watch_id); + } + + spin_unlock(&watch_points_lock); +} + +static bool kfd_dbg_owns_dev_watch_id(struct kfd_process_device *pdd, int watch_id) +{ + bool owns_watch_id = false; + + spin_lock(&watch_points_lock); + owns_watch_id = watch_id < MAX_WATCH_ADDRESSES && ((pdd->alloc_watch_ids >> watch_id) & 0x1); + spin_unlock(&watch_points_lock); + + return owns_watch_id; +} + +int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd, uint32_t watch_id) +{ + int r; + + if (!kfd_dbg_owns_dev_watch_id(pdd, watch_id)) + return -EINVAL; + + r = debug_lock_and_unmap(pdd->dev->dqm); + if (r) + return r; + + amdgpu_amdkfd_gfx_off_ctrl(pdd->dev->adev, false); + pdd->watch_points[watch_id] = pdd->dev->kfd2kgd->clear_address_watch( + pdd->dev->adev, + watch_id); + amdgpu_amdkfd_gfx_off_ctrl(pdd->dev->adev, true); + + r = debug_map_and_unlock(pdd->dev->dqm); + + kfd_dbg_clear_dev_watch_id(pdd, watch_id); + + return r; +} + +int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd, + uint64_t watch_address, + uint32_t watch_address_mask, + uint32_t *watch_id, + uint32_t watch_mode) +{ + int r = kfd_dbg_get_dev_watch_id(pdd, watch_id); + + if (r) + return r; + + r = debug_lock_and_unmap(pdd->dev->dqm); + if (r) { + kfd_dbg_clear_dev_watch_id(pdd, *watch_id); + return r; + } + + amdgpu_amdkfd_gfx_off_ctrl(pdd->dev->adev, false); + pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd->set_address_watch( + pdd->dev->adev, + watch_address, + watch_address_mask, + *watch_id, + watch_mode, + pdd->dev->vm_info.last_vmid_kfd); + amdgpu_amdkfd_gfx_off_ctrl(pdd->dev->adev, true); + + r = debug_map_and_unlock(pdd->dev->dqm); + /* HWS is broken so no point in HW rollback but release the watchpoint anyways. */ + if (r) + kfd_dbg_clear_dev_watch_id(pdd, *watch_id); + + return r; +} + +static void kfd_dbg_clear_process_address_watch(struct kfd_process *target) { + int i, j; + + for (i = 0; i < target->n_pdds; i++) + for (j = 0; j < MAX_WATCH_ADDRESSES; j++) + kfd_dbg_trap_clear_dev_address_watch(target->pdds[i], j); +} + /* kfd_dbg_trap_deactivate: * target: target process * unwind: If this is unwinding a failed kfd_dbg_trap_enable() @@ -330,9 +434,7 @@ static void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int if (!unwind) { cancel_work_sync(&target->debug_event_workarea); - kfd_release_debug_watch_points(target, - target->allocated_debug_watch_point_bitmask); - target->allocated_debug_watch_point_bitmask = 0; + kfd_dbg_clear_process_address_watch(target); kfd_dbg_trap_set_wave_launch_mode(target, 0); kfd_dbg_trap_set_precise_mem_ops(target, 0); } @@ -599,42 +701,6 @@ int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target, return r; } -int kfd_dbg_trap_clear_address_watch(struct kfd_process *target, - uint32_t watch_id) -{ - /* check that we own watch id */ - if (!((1<allocated_debug_watch_point_bitmask)) { - pr_debug("Trying to free a watch point we don't own\n"); - return -EINVAL; - } - kfd_release_debug_watch_points(target, 1<allocated_debug_watch_point_bitmask ^= (1<allocated_debug_watch_point_bitmask |= (1 << *watch_id); - - return 0; -} - int kfd_dbg_trap_set_precise_mem_ops(struct kfd_process *target, uint32_t enable) { @@ -662,138 +728,6 @@ int kfd_dbg_trap_set_precise_mem_ops(struct kfd_process *target, return 0; } -#define KFD_DEBUGGER_INVALID_WATCH_POINT_ID -1 -static int kfd_dbg_get_watchpoint_id(void) -{ - int i, watch_point_to_allocate = KFD_DEBUGGER_INVALID_WATCH_POINT_ID; - - spin_lock(&watch_points_lock); - - for (i = 0; i < MAX_WATCH_ADDRESSES; i++) { - if (!(allocated_debug_watch_points & (1<n_pdds; i++) { - struct kfd_process_device *pdd = p->pdds[i]; - - r = debug_lock_and_unmap(pdd->dev->dqm); - if (r) - break; - - amdgpu_amdkfd_gfx_off_ctrl(pdd->dev->adev, false); - pdd->watch_points[*watch_point] = pdd->dev->kfd2kgd->set_address_watch( - pdd->dev->adev, - watch_address, - watch_address_mask, - *watch_point, - watch_mode, - pdd->dev->vm_info.last_vmid_kfd); - amdgpu_amdkfd_gfx_off_ctrl(pdd->dev->adev, true); - - r = debug_map_and_unlock(pdd->dev->dqm); - if (r) - break; - } - - return r; -} - -static int kfd_dbg_free_watchpoint_id(uint32_t watch_point_bit_mask_to_free) -{ - spin_lock(&watch_points_lock); - if (~allocated_debug_watch_points & watch_point_bit_mask_to_free) { - pr_err("Tried to free a free watch point! " - "allocated_debug_watch_points == 0x%08x " - "watch_point_bit_mask_to_free = 0x%08x\n", - allocated_debug_watch_points, - watch_point_bit_mask_to_free); - spin_unlock(&watch_points_lock); - return -EFAULT; - } - - pr_debug("Freeing watchpoint bitmask :0x%08x\n", - watch_point_bit_mask_to_free); - allocated_debug_watch_points ^= watch_point_bit_mask_to_free; - spin_unlock(&watch_points_lock); - - return 0; -} - - -static int kfd_release_debug_watch_points(struct kfd_process *p, - uint32_t watch_point_bit_mask_to_free) -{ - int i, j; - int r = kfd_dbg_free_watchpoint_id(watch_point_bit_mask_to_free); - - if (r) - return r; - - for (i = 0; i < p->n_pdds; i++) { - struct kfd_process_device *pdd = p->pdds[i]; - - r = debug_lock_and_unmap(pdd->dev->dqm); - if (r) - break; - - amdgpu_amdkfd_gfx_off_ctrl(pdd->dev->adev, false); - for (j = 0; j < MAX_WATCH_ADDRESSES; j++) { - if ((1<watch_points[j] = - pdd->dev->kfd2kgd->clear_address_watch( - pdd->dev->adev, - j); - } - amdgpu_amdkfd_gfx_off_ctrl(pdd->dev->adev, true); - - r = debug_map_and_unlock(pdd->dev->dqm); - if (r) - break; - } - return r; -} - int kfd_dbg_trap_query_exception_info(struct kfd_process *target, uint32_t source_id, uint32_t exception_code, @@ -916,23 +850,22 @@ int kfd_dbg_trap_query_exception_info(struct kfd_process *target, int kfd_dbg_trap_device_snapshot(struct kfd_process *target, uint64_t exception_clear_mask, void __user *user_info, - uint32_t *number_of_device_infos) + uint32_t *number_of_device_infos, + uint32_t *entry_size) { - int i; - struct kfd_dbg_device_info_entry device_info[MAX_GPU_INSTANCE]; + struct kfd_dbg_device_info_entry device_info = {0}; + uint32_t tmp_entry_size = *entry_size; + int i, r = 0; - if (!target) - return -EINVAL; - - if (!user_info || !number_of_device_infos) + if (!(target && user_info && number_of_device_infos && entry_size)) return -EINVAL; if (*number_of_device_infos < target->n_pdds) { - *number_of_device_infos = target->n_pdds; - return -ENOSPC; + r = -ENOSPC; + goto out; } - memset(device_info, 0, sizeof(device_info)); + *entry_size = min((size_t)entry_size, sizeof(device_info)); mutex_lock(&target->event_mutex); @@ -940,26 +873,32 @@ int kfd_dbg_trap_device_snapshot(struct kfd_process *target, for (i = 0; i < target->n_pdds; i++) { struct kfd_process_device *pdd = target->pdds[i]; - device_info[i].gpu_id = pdd->dev->id; - device_info[i].exception_status = pdd->exception_status; - device_info[i].lds_base = pdd->lds_base; - device_info[i].lds_limit = pdd->lds_limit; - device_info[i].scratch_base = pdd->scratch_base; - device_info[i].scratch_limit = pdd->scratch_limit; - device_info[i].gpuvm_base = pdd->gpuvm_base; - device_info[i].gpuvm_limit = pdd->gpuvm_limit; + device_info.gpu_id = pdd->dev->id; + device_info.exception_status = pdd->exception_status; + device_info.lds_base = pdd->lds_base; + device_info.lds_limit = pdd->lds_limit; + device_info.scratch_base = pdd->scratch_base; + device_info.scratch_limit = pdd->scratch_limit; + device_info.gpuvm_base = pdd->gpuvm_base; + device_info.gpuvm_limit = pdd->gpuvm_limit; if (exception_clear_mask) pdd->exception_status &= ~exception_clear_mask; + + if (copy_to_user(user_info, &device_info, *entry_size)) { + r = -EFAULT; + break; + } + + user_info += tmp_entry_size; } + mutex_unlock(&target->event_mutex); - if (copy_to_user(user_info, device_info, - sizeof(device_info[0]) * target->n_pdds)) - return -EFAULT; +out: *number_of_device_infos = target->n_pdds; - return 0; + return r; } int kfd_dbg_runtime_enable(struct kfd_process *p, uint64_t r_debug, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h index 8a8de64845b46..7a982db3a4c89 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h @@ -58,9 +58,9 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target, uint32_t *trap_mask_supported); int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target, uint8_t wave_launch_mode); -int kfd_dbg_trap_clear_address_watch(struct kfd_process *target, +int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd, uint32_t watch_id); -int kfd_dbg_trap_set_address_watch(struct kfd_process *target, +int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd, uint64_t watch_address, uint32_t watch_address_mask, uint32_t *watch_id, @@ -93,7 +93,8 @@ int kfd_dbg_trap_query_exception_info(struct kfd_process *target, int kfd_dbg_trap_device_snapshot(struct kfd_process *target, uint64_t exception_clear_mask, void __user *user_info, - uint32_t *number_of_device_infos); + uint32_t *number_of_device_infos, + uint32_t *entry_size); void kfd_dbg_set_enabled_debug_exception_mask(struct kfd_process *target, uint64_t exception_set_mask); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 655b6fca8c10c..651d10daa36d4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -906,7 +906,8 @@ static long user_timeout_to_jiffies(uint32_t user_timeout_ms) return msecs_to_jiffies(user_timeout_ms) + 1; } -static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters) +static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters, + bool undo_auto_reset) { uint32_t i; @@ -915,6 +916,9 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters) spin_lock(&waiters[i].event->lock); remove_wait_queue(&waiters[i].event->wq, &waiters[i].wait); + if (undo_auto_reset && waiters[i].activated && + waiters[i].event && waiters[i].event->auto_reset) + set_event(waiters[i].event); spin_unlock(&waiters[i].event->lock); } @@ -923,7 +927,7 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters) int kfd_wait_on_events(struct kfd_process *p, uint32_t num_events, void __user *data, - bool all, uint32_t user_timeout_ms, + bool all, uint32_t *user_timeout_ms, uint32_t *wait_result) { struct kfd_event_data __user *events = @@ -932,7 +936,7 @@ int kfd_wait_on_events(struct kfd_process *p, int ret = 0; struct kfd_event_waiter *event_waiters = NULL; - long timeout = user_timeout_to_jiffies(user_timeout_ms); + long timeout = user_timeout_to_jiffies(*user_timeout_ms); event_waiters = alloc_event_waiters(num_events); if (!event_waiters) { @@ -982,15 +986,11 @@ int kfd_wait_on_events(struct kfd_process *p, } if (signal_pending(current)) { - /* - * This is wrong when a nonzero, non-infinite timeout - * is specified. We need to use - * ERESTARTSYS_RESTARTBLOCK, but struct restart_block - * contains a union with data for each user and it's - * in generic kernel code that I don't want to - * touch yet. - */ ret = -ERESTARTSYS; + if (*user_timeout_ms != KFD_EVENT_TIMEOUT_IMMEDIATE && + *user_timeout_ms != KFD_EVENT_TIMEOUT_INFINITE) + *user_timeout_ms = jiffies_to_msecs( + max(0l, timeout-1)); break; } @@ -1031,7 +1031,7 @@ int kfd_wait_on_events(struct kfd_process *p, event_waiters, events); out_unlock: - free_waiters(num_events, event_waiters); + free_waiters(num_events, event_waiters, ret == -ERESTARTSYS); mutex_unlock(&p->event_mutex); out: if (ret) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 76841a60efe67..db2a8a070b695 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -367,6 +367,9 @@ struct kfd_dev { /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */ struct dev_pagemap pgmap; + + /* Track per device allocated watch points. */ + uint32_t alloc_watch_ids; }; struct kfd_ipc_obj; @@ -882,6 +885,7 @@ struct kfd_process_device { uint32_t spi_dbg_override; uint32_t spi_dbg_launch_mode; uint32_t watch_points[4]; + uint32_t alloc_watch_ids; /* * If this process has been checkpointed before, then the user @@ -1006,9 +1010,6 @@ struct kfd_process { /* per-process-per device debug event fd file */ struct file *dbg_ev_file; - /* Allocated debug watch point IDs bitmask */ - uint32_t allocated_debug_watch_point_bitmask; - /* If the process is a kfd debugger, we need to know so we can clean * up at exit time. If a process enables debugging on itself, it does * its own clean-up, so we don't set the flag here. We track this by @@ -1435,8 +1436,9 @@ int pqm_get_wave_state(struct process_queue_manager *pqm, int pqm_get_queue_snapshot(struct process_queue_manager *pqm, uint64_t exception_clear_mask, - struct kfd_queue_snapshot_entry __user *buf, - int num_qss_entries); + void __user *buf, + int num_qss_entries, + uint32_t *entry_size); int amdkfd_fence_wait_timeout(uint64_t *fence_addr, uint64_t fence_value, @@ -1531,7 +1533,7 @@ void kfd_event_free_process(struct kfd_process *p); int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma); int kfd_wait_on_events(struct kfd_process *p, uint32_t num_events, void __user *data, - bool all, uint32_t user_timeout_ms, + bool all, uint32_t *user_timeout_ms, uint32_t *wait_result); void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, uint32_t valid_id_bits); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 79d609c0732f5..43697b3e4c9c2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1028,11 +1028,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base) free_pages((unsigned long)pdd->qpd.cwsr_kaddr, get_order(KFD_CWSR_TBA_TMA_SIZE)); -#if defined(HAVE_BITMAP_FUNCS) bitmap_free(pdd->qpd.doorbell_bitmap); -#else - bitmap_free(pdd->qpd.doorbell_bitmap); -#endif idr_destroy(&pdd->alloc_idr); mutex_destroy(&pdd->qpd.doorbell_lock); @@ -1548,13 +1544,8 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd, if (!KFD_IS_SOC15(dev)) return 0; -#if defined(HAVE_BITMAP_FUNCS) qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, GFP_KERNEL); -#else - qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, - GFP_KERNEL); -#endif if (!qpd->doorbell_bitmap) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index d7beb2fe72c24..35a1dfe7fe64b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -151,13 +151,8 @@ void kfd_process_dequeue_from_all_devices(struct kfd_process *p) int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p) { INIT_LIST_HEAD(&pqm->queues); -#if defined(HAVE_BITMAP_FUNCS) pqm->queue_slot_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, GFP_KERNEL); -#else - pqm->queue_slot_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, - GFP_KERNEL); -#endif if (!pqm->queue_slot_bitmap) return -ENOMEM; pqm->process = p; @@ -179,11 +174,7 @@ void pqm_uninit(struct process_queue_manager *pqm) kfree(pqn); } -#if defined(HAVE_BITMAP_FUNCS) bitmap_free(pqm->queue_slot_bitmap); -#else - bitmap_free(pqm->queue_slot_bitmap); -#endif pqm->queue_slot_bitmap = NULL; } @@ -583,12 +574,18 @@ int pqm_get_wave_state(struct process_queue_manager *pqm, int pqm_get_queue_snapshot(struct process_queue_manager *pqm, uint64_t exception_clear_mask, - struct kfd_queue_snapshot_entry __user *buf, - int num_qss_entries) + void __user *buf, + int num_qss_entries, + uint32_t *entry_size) { struct process_queue_node *pqn; - int r, qss_entry_count = 0; + uint32_t tmp_entry_size = *entry_size; + int qss_entry_count = 0; + + if (!(*entry_size)) + return -EINVAL; + *entry_size = min((size_t)entry_size, sizeof(struct kfd_queue_snapshot_entry)); mutex_lock(&pqm->process->event_mutex); list_for_each_entry(pqn, &pqm->queues, process_queue_list) { @@ -602,18 +599,19 @@ int pqm_get_queue_snapshot(struct process_queue_manager *pqm, set_queue_snapshot_entry(pqn->q->device->dqm, pqn->q, exception_clear_mask, &src); - r = copy_to_user(buf++, &src, sizeof(src)); - - if (r) { + if (copy_to_user(buf, &src, *entry_size)) { qss_entry_count = -EFAULT; - goto unlock; + break; } + + buf += tmp_entry_size; } qss_entry_count++; } -unlock: + mutex_unlock(&pqm->process->event_mutex); + return qss_entry_count; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 55f923aca648f..4b9d2a15fb085 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1287,7 +1287,6 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, dma_addr_t *dma_addr, struct amdgpu_device *bo_adev, struct dma_fence **fence, bool flush_tlb) { - uint64_t vram_base_offset = 0; struct amdgpu_device *adev = pdd->dev->adev; struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv); uint64_t pte_flags; @@ -1301,9 +1300,6 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, pr_debug("svms 0x%p [0x%lx 0x%lx] readonly %d\n", prange->svms, last_start, last_start + npages - 1, readonly); - if (prange->svm_bo && prange->ttm_res) - vram_base_offset = bo_adev->vm_manager.vram_base_offset; - for (i = offset; i < offset + npages; i++) { last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN; dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0b754d909fb98..5232aca9f3710 100755 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4017,8 +4017,11 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; - /* disable prefer shadow for now due to hibernation issues */ - adev_to_drm(adev)->mode_config.prefer_shadow = 0; + if (adev->asic_type == CHIP_HAWAII) + /* disable prefer shadow for now due to hibernation issues */ + adev_to_drm(adev)->mode_config.prefer_shadow = 0; + else + adev_to_drm(adev)->mode_config.prefer_shadow = 1; /* indicates support for immediate flip */ adev_to_drm(adev)->mode_config.async_page_flip = true; @@ -4380,6 +4383,7 @@ static void register_backlight_device(struct amdgpu_display_manager *dm, } } +static void amdgpu_set_panel_orientation(struct drm_connector *connector); /* * In this architecture, the association @@ -4573,6 +4577,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) adev_to_drm(adev)->vblank_disable_immediate = false; } } + amdgpu_set_panel_orientation(&aconnector->base); } /* Software is initialized. Now we can register interrupt handlers. */ @@ -7263,6 +7268,10 @@ static void amdgpu_set_panel_orientation(struct drm_connector *connector) connector->connector_type != DRM_MODE_CONNECTOR_LVDS) return; + mutex_lock(&connector->dev->mode_config.mutex); + amdgpu_dm_connector_get_modes(connector); + mutex_unlock(&connector->dev->mode_config.mutex); + encoder = amdgpu_dm_connector_to_encoder(connector); if (!encoder) return; @@ -7307,8 +7316,6 @@ static void amdgpu_dm_connector_ddc_get_modes(struct drm_connector *connector, * restored here. */ amdgpu_dm_update_freesync_caps(connector, edid); - - amdgpu_set_panel_orientation(connector); } else { amdgpu_dm_connector->num_modes = 0; } @@ -10226,6 +10233,7 @@ static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm /** * amdgpu_dm_atomic_check() - Atomic check implementation for AMDgpu DM. + * * @dev: The DRM device * @state: The atomic state to commit * @@ -10945,8 +10953,18 @@ static int parse_hdmi_amd_vsdb(struct amdgpu_dm_connector *aconnector, return valid_vsdb_found ? i : -ENODEV; } +/** + * amdgpu_dm_update_freesync_caps - Update Freesync capabilities + * + * @aconnector: Connector to query. + * + * Amdgpu supports Freesync in DP and HDMI displays, and it is required to keep + * track of some of the display information in the internal data struct used by + * amdgpu_dm. This function checks which type of connector we need to set the + * FreeSync parameters. + */ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, - struct edid *edid) + struct edid *edid) { int i = 0; struct detailed_timing *timing; @@ -10959,8 +10977,8 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, struct drm_device *dev = connector->dev; struct amdgpu_device *adev = drm_to_adev(dev); - bool freesync_capable = false; struct amdgpu_hdmi_vsdb_info vsdb_info = {0}; + bool freesync_capable = false; if (!connector->state) { DRM_ERROR("%s - Connector has no state", __func__); @@ -10991,7 +11009,6 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, if (!adev->dm.freesync_module) goto update; - if (sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT || sink->sink_signal == SIGNAL_TYPE_EDP) { bool edid_check_required = false; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index ec12ee7a1f1a8..74fa9e7cde9c1 100755 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -633,6 +633,10 @@ struct amdgpu_dm_connector { * The 'current' sink is in dc_link->sink. */ struct dc_sink *dc_sink; struct dc_link *dc_link; + + /** + * @dc_em_sink: Reference to the emulated (virtual) sink. + */ struct dc_sink *dc_em_sink; /* DM only */ @@ -645,7 +649,16 @@ struct amdgpu_dm_connector { struct amdgpu_i2c_adapter *i2c; /* Monitor range limits */ - int min_vfreq ; + /** + * @min_vfreq: Minimal frequency supported by the display in Hz. This + * value is set to zero when there is no FreeSync support. + */ + int min_vfreq; + + /** + * @max_vfreq: Maximum frequency supported by the display in Hz. This + * value is set to zero when there is no FreeSync support. + */ int max_vfreq ; int pixel_clock_mhz; @@ -757,11 +770,34 @@ struct dm_connector_state { #endif }; +/** + * struct amdgpu_hdmi_vsdb_info - Keep track of the VSDB info + * + * AMDGPU supports FreeSync over HDMI by using the VSDB section, and this + * struct is useful to keep track of the display-specific information about + * FreeSync. + */ struct amdgpu_hdmi_vsdb_info { - unsigned int amd_vsdb_version; /* VSDB version, should be used to determine which VSIF to send */ - bool freesync_supported; /* FreeSync Supported */ - unsigned int min_refresh_rate_hz; /* FreeSync Minimum Refresh Rate in Hz */ - unsigned int max_refresh_rate_hz; /* FreeSync Maximum Refresh Rate in Hz */ + /** + * @amd_vsdb_version: Vendor Specific Data Block Version, should be + * used to determine which Vendor Specific InfoFrame (VSIF) to send. + */ + unsigned int amd_vsdb_version; + + /** + * @freesync_supported: FreeSync Supported. + */ + bool freesync_supported; + + /** + * @min_refresh_rate_hz: FreeSync Minimum Refresh Rate in Hz. + */ + unsigned int min_refresh_rate_hz; + + /** + * @max_refresh_rate_hz: FreeSync Maximum Refresh Rate in Hz + */ + unsigned int max_refresh_rate_hz; }; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index a71177305bcdf..a4cb23d059bd6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -29,7 +29,9 @@ #include "modules/color/color_gamma.h" #include "basics/conversion.h" -/* +/** + * DOC: overview + * * The DC interface to HW gives us the following color management blocks * per pipe (surface): * @@ -71,8 +73,8 @@ #define MAX_DRM_LUT_VALUE 0xFFFF -/* - * Initialize the color module. +/** + * amdgpu_dm_init_color_mod - Initialize the color module. * * We're not using the full color module, only certain components. * Only call setup functions for components that we need. @@ -82,7 +84,14 @@ void amdgpu_dm_init_color_mod(void) setup_x_points_distribution(); } -/* Extracts the DRM lut and lut size from a blob. */ +/** + * __extract_blob_lut - Extracts the DRM lut and lut size from a blob. + * @blob: DRM color mgmt property blob + * @size: lut size + * + * Returns: + * DRM LUT or NULL + */ static const struct drm_color_lut * __extract_blob_lut(const struct drm_property_blob *blob, uint32_t *size) { @@ -90,13 +99,18 @@ __extract_blob_lut(const struct drm_property_blob *blob, uint32_t *size) return blob ? (struct drm_color_lut *)blob->data : NULL; } -/* - * Return true if the given lut is a linear mapping of values, i.e. it acts - * like a bypass LUT. +/** + * __is_lut_linear - check if the given lut is a linear mapping of values + * @lut: given lut to check values + * @size: lut size * * It is considered linear if the lut represents: - * f(a) = (0xFF00/MAX_COLOR_LUT_ENTRIES-1)a; for integer a in - * [0, MAX_COLOR_LUT_ENTRIES) + * f(a) = (0xFF00/MAX_COLOR_LUT_ENTRIES-1)a; for integer a in [0, + * MAX_COLOR_LUT_ENTRIES) + * + * Returns: + * True if the given lut is a linear mapping of values, i.e. it acts like a + * bypass LUT. Otherwise, false. */ static bool __is_lut_linear(const struct drm_color_lut *lut, uint32_t size) { @@ -119,9 +133,13 @@ static bool __is_lut_linear(const struct drm_color_lut *lut, uint32_t size) return true; } -/* - * Convert the drm_color_lut to dc_gamma. The conversion depends on the size - * of the lut - whether or not it's legacy. +/** + * __drm_lut_to_dc_gamma - convert the drm_color_lut to dc_gamma. + * @lut: DRM lookup table for color conversion + * @gamma: DC gamma to set entries + * @is_legacy: legacy or atomic gamma + * + * The conversion depends on the size of the lut - whether or not it's legacy. */ static void __drm_lut_to_dc_gamma(const struct drm_color_lut *lut, struct dc_gamma *gamma, bool is_legacy) @@ -154,8 +172,11 @@ static void __drm_lut_to_dc_gamma(const struct drm_color_lut *lut, } } -/* - * Converts a DRM CTM to a DC CSC float matrix. +/** + * __drm_ctm_to_dc_matrix - converts a DRM CTM to a DC CSC float matrix + * @ctm: DRM color transformation matrix + * @matrix: DC CSC float matrix + * * The matrix needs to be a 3x4 (12 entry) matrix. */ static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm, @@ -189,7 +210,18 @@ static void __drm_ctm_to_dc_matrix(const struct drm_color_ctm *ctm, } } -/* Calculates the legacy transfer function - only for sRGB input space. */ +/** + * __set_legacy_tf - Calculates the legacy transfer function + * @func: transfer function + * @lut: lookup table that defines the color space + * @lut_size: size of respective lut + * @has_rom: if ROM can be used for hardcoded curve + * + * Only for sRGB input space + * + * Returns: + * 0 in case of success, -ENOMEM if fails + */ static int __set_legacy_tf(struct dc_transfer_func *func, const struct drm_color_lut *lut, uint32_t lut_size, bool has_rom) @@ -218,7 +250,16 @@ static int __set_legacy_tf(struct dc_transfer_func *func, return res ? 0 : -ENOMEM; } -/* Calculates the output transfer function based on expected input space. */ +/** + * __set_output_tf - calculates the output transfer function based on expected input space. + * @func: transfer function + * @lut: lookup table that defines the color space + * @lut_size: size of respective lut + * @has_rom: if ROM can be used for hardcoded curve + * + * Returns: + * 0 in case of success. -ENOMEM if fails. + */ static int __set_output_tf(struct dc_transfer_func *func, const struct drm_color_lut *lut, uint32_t lut_size, bool has_rom) @@ -262,7 +303,16 @@ static int __set_output_tf(struct dc_transfer_func *func, return res ? 0 : -ENOMEM; } -/* Caculates the input transfer function based on expected input space. */ +/** + * __set_input_tf - calculates the input transfer function based on expected + * input space. + * @func: transfer function + * @lut: lookup table that defines the color space + * @lut_size: size of respective lut. + * + * Returns: + * 0 in case of success. -ENOMEM if fails. + */ static int __set_input_tf(struct dc_transfer_func *func, const struct drm_color_lut *lut, uint32_t lut_size) { @@ -285,13 +335,14 @@ static int __set_input_tf(struct dc_transfer_func *func, } /** - * amdgpu_dm_verify_lut_sizes + * amdgpu_dm_verify_lut_sizes - verifies if DRM luts match the hw supported sizes * @crtc_state: the DRM CRTC state * - * Verifies that the Degamma and Gamma LUTs attached to the |crtc_state| are of - * the expected size. + * Verifies that the Degamma and Gamma LUTs attached to the &crtc_state + * are of the expected size. * - * Returns 0 on success. + * Returns: + * 0 on success. -EINVAL if any lut sizes are invalid. */ int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state) { @@ -327,9 +378,9 @@ int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state) * of the HW blocks as long as the CRTC CTM always comes before the * CRTC RGM and after the CRTC DGM. * - * The CRTC RGM block will be placed in the RGM LUT block if it is non-linear. - * The CRTC DGM block will be placed in the DGM LUT block if it is non-linear. - * The CRTC CTM will be placed in the gamut remap block if it is non-linear. + * - The CRTC RGM block will be placed in the RGM LUT block if it is non-linear. + * - The CRTC DGM block will be placed in the DGM LUT block if it is non-linear. + * - The CRTC CTM will be placed in the gamut remap block if it is non-linear. * * The RGM block is typically more fully featured and accurate across * all ASICs - DCE can't support a custom non-linear CRTC DGM. @@ -338,7 +389,8 @@ int amdgpu_dm_verify_lut_sizes(const struct drm_crtc_state *crtc_state) * management at once we have to either restrict the usage of CRTC properties * or blend adjustments together. * - * Returns 0 on success. + * Returns: + * 0 on success. Error code if setup fails. */ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) { @@ -393,7 +445,7 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) if (r) return r; } else if (has_regamma) { - /* CRTC RGM goes into RGM LUT. */ + /* If atomic regamma, CRTC RGM goes into RGM LUT. */ stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS; stream->out_transfer_func->tf = TRANSFER_FUNCTION_LINEAR; @@ -450,9 +502,10 @@ int amdgpu_dm_update_crtc_color_mgmt(struct dm_crtc_state *crtc) * * Update the underlying dc_stream_state's input transfer function (ITF) in * preparation for hardware commit. The transfer function used depends on - * the prepartion done on the stream for color management. + * the preparation done on the stream for color management. * - * Returns 0 on success. + * Returns: + * 0 on success. -ENOMEM if mem allocation fails. */ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, struct dc_plane_state *dc_plane_state) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 62d115fd02396..24bd6b469a00e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -818,8 +818,14 @@ bool dm_helpers_dp_write_dsc_enable( const struct dc_stream_state *stream, bool enable) { - uint8_t enable_dsc = enable ? 1 : 0; + static const uint8_t DSC_DISABLE; + static const uint8_t DSC_DECODING = 0x01; + static const uint8_t DSC_PASSTHROUGH = 0x02; + struct amdgpu_dm_connector *aconnector; + struct drm_dp_mst_port *port; + uint8_t enable_dsc = enable ? DSC_DECODING : DSC_DISABLE; + uint8_t enable_passthrough = enable ? DSC_PASSTHROUGH : DSC_DISABLE; uint8_t ret = 0; if (!stream) @@ -839,8 +845,39 @@ bool dm_helpers_dp_write_dsc_enable( aconnector->dsc_aux, stream, enable_dsc); #endif - ret = drm_dp_dpcd_write(aconnector->dsc_aux, DP_DSC_ENABLE, &enable_dsc, 1); - DC_LOG_DC("Send DSC %s to MST RX\n", enable_dsc ? "enable" : "disable"); + port = aconnector->port; + + if (enable) { + if (port->passthrough_aux) { + ret = drm_dp_dpcd_write(port->passthrough_aux, + DP_DSC_ENABLE, + &enable_passthrough, 1); + DC_LOG_DC("Sent DSC pass-through enable to virtual dpcd port, ret = %u\n", + ret); + } + + ret = drm_dp_dpcd_write(aconnector->dsc_aux, + DP_DSC_ENABLE, &enable_dsc, 1); + DC_LOG_DC("Sent DSC decoding enable to %s port, ret = %u\n", + (port->passthrough_aux) ? "remote RX" : + "virtual dpcd", + ret); + } else { + ret = drm_dp_dpcd_write(aconnector->dsc_aux, + DP_DSC_ENABLE, &enable_dsc, 1); + DC_LOG_DC("Sent DSC decoding disable to %s port, ret = %u\n", + (port->passthrough_aux) ? "remote RX" : + "virtual dpcd", + ret); + + if (port->passthrough_aux) { + ret = drm_dp_dpcd_write(port->passthrough_aux, + DP_DSC_ENABLE, + &enable_passthrough, 1); + DC_LOG_DC("Sent DSC pass-through disable to virtual dpcd port, ret = %u\n", + ret); + } + } } if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT || stream->signal == SIGNAL_TYPE_EDP) { @@ -857,7 +894,7 @@ bool dm_helpers_dp_write_dsc_enable( #endif } - return (ret > 0); + return ret; } #endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 47835cec76850..1b9ccce0a78e4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -36,6 +36,7 @@ #include "dm_helpers.h" #include "dc_link_ddc.h" +#include "dc_link_dp.h" #include "ddc_service_types.h" #include "dpcd_defs.h" @@ -1569,17 +1570,85 @@ bool pre_validate_dsc(struct drm_atomic_state *state, #endif +static unsigned int kbps_from_pbn(unsigned int pbn) +{ + unsigned int kbps = pbn; + + kbps *= (1000000 / PEAK_FACTOR_X1000); + kbps *= 8; + kbps *= 54; + kbps /= 64; + + return kbps; +} + +static bool is_dsc_common_config_possible(struct dc_stream_state *stream, + struct dc_dsc_bw_range *bw_range) +{ + struct dc_dsc_policy dsc_policy = {0}; + + dc_dsc_get_policy_for_timing(&stream->timing, 0, &dsc_policy); + dc_dsc_compute_bandwidth_range(stream->sink->ctx->dc->res_pool->dscs[0], + stream->sink->ctx->dc->debug.dsc_min_slice_height_override, + dsc_policy.min_target_bpp * 16, + dsc_policy.max_target_bpp * 16, + &stream->sink->dsc_caps.dsc_dec_caps, + &stream->timing, bw_range); + + return bw_range->max_target_bpp_x16 && bw_range->min_target_bpp_x16; +} + enum dc_status dm_dp_mst_is_port_support_mode( struct amdgpu_dm_connector *aconnector, struct dc_stream_state *stream) { + struct dc_link_settings cur_link_settings; + unsigned int end_to_end_bw_in_kbps = 0; + unsigned int upper_link_bw_in_kbps = 0, down_link_bw_in_kbps = 0; + unsigned int max_compressed_bw_in_kbps = 0; + struct dc_dsc_bw_range bw_range = {0}; int bpp, pbn, branch_max_throughput_mps = 0; - /* check if mode could be supported within fUll_pbn */ - bpp = convert_dc_color_depth_into_bpc(stream->timing.display_color_depth) * 3; - pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp, false); - if (pbn > aconnector->port->full_pbn) - return DC_FAIL_BANDWIDTH_VALIDATE; + /* + * check if the mode could be supported if DSC pass-through is supported + * AND check if there enough bandwidth available to support the mode + * with DSC enabled. + */ + if (is_dsc_common_config_possible(stream, &bw_range) && + aconnector->port->passthrough_aux) { + mutex_lock(&aconnector->mst_mgr.lock); + + cur_link_settings = stream->link->verified_link_cap; + + upper_link_bw_in_kbps = dc_link_bandwidth_kbps(aconnector->dc_link, + &cur_link_settings + ); + down_link_bw_in_kbps = kbps_from_pbn(aconnector->port->full_pbn); + + /* pick the bottleneck */ + end_to_end_bw_in_kbps = min(upper_link_bw_in_kbps, + down_link_bw_in_kbps); + + mutex_unlock(&aconnector->mst_mgr.lock); + + /* + * use the maximum dsc compression bandwidth as the required + * bandwidth for the mode + */ + max_compressed_bw_in_kbps = bw_range.min_kbps; + + if (end_to_end_bw_in_kbps < max_compressed_bw_in_kbps) { + DRM_DEBUG_DRIVER("Mode does not fit into DSC pass-through bandwidth validation\n"); + return DC_FAIL_BANDWIDTH_VALIDATE; + } + } else { + /* check if mode could be supported within full_pbn */ + bpp = convert_dc_color_depth_into_bpc(stream->timing.display_color_depth) * 3; + pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp, false); + + if (pbn > aconnector->port->full_pbn) + return DC_FAIL_BANDWIDTH_VALIDATE; + } /* check is mst dsc output bandwidth branch_overall_throughput_0_mps */ switch (stream->timing.pixel_encoding) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 440ea0d19f899..79ca880c181bb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -802,7 +802,7 @@ static int get_plane_modifiers(struct amdgpu_device *adev, unsigned int plane_ty add_gfx10_1_modifiers(adev, mods, &size, &capacity); break; case AMDGPU_FAMILY_GC_11_0_0: - case AMDGPU_FAMILY_GC_11_0_2: + case AMDGPU_FAMILY_GC_11_0_1: add_gfx11_modifiers(adev, mods, &size, &capacity); break; } @@ -1638,7 +1638,7 @@ static bool dm_plane_format_mod_supported(struct drm_plane *plane, } break; case AMDGPU_FAMILY_GC_11_0_0: - case AMDGPU_FAMILY_GC_11_0_2: + case AMDGPU_FAMILY_GC_11_0_1: switch (AMD_FMT_MOD_GET(TILE, modifier)) { case AMD_FMT_MOD_TILE_GFX11_256K_R_X: case AMD_FMT_MOD_TILE_GFX9_64K_R_X: @@ -1824,6 +1824,11 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, return res; #endif +#ifdef HAVE_DRM_MODE_CONFIG_FB_MODIFIERS_NOT_SUPPORTED + if (modifiers == NULL) + adev_to_drm(dm->adev)->mode_config.fb_modifiers_not_supported = true; +#endif + res = drm_universal_plane_init(adev_to_drm(dm->adev), plane, possible_crtcs, &dm_plane_funcs, formats, num_formats, modifiers, plane->type, NULL); diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c index 4c76091fd1f21..f276abb63bcd7 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c @@ -337,7 +337,7 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p break; } - case AMDGPU_FAMILY_GC_11_0_2: { + case AMDGPU_FAMILY_GC_11_0_1: { struct clk_mgr_dcn314 *clk_mgr = kzalloc(sizeof(*clk_mgr), GFP_KERNEL); if (clk_mgr == NULL) { @@ -397,7 +397,7 @@ void dc_destroy_clk_mgr(struct clk_mgr *clk_mgr_base) dcn32_clk_mgr_destroy(clk_mgr); break; - case AMDGPU_FAMILY_GC_11_0_2: + case AMDGPU_FAMILY_GC_11_0_1: dcn314_clk_mgr_destroy(clk_mgr); break; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index 0202dc682682b..ca6dfd2d7561f 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -24,10 +24,9 @@ */ #include "dccg.h" -#include "clk_mgr_internal.h" +#include "rn_clk_mgr.h" #include "dcn20/dcn20_clk_mgr.h" -#include "rn_clk_mgr.h" #include "dml/dcn20/dcn20_fpu.h" #include "dce100/dce_clk_mgr.h" diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h index 2e088c5171b28..f1319957e400a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.h @@ -28,6 +28,7 @@ #include "clk_mgr.h" #include "dm_pp_smu.h" +#include "clk_mgr_internal.h" extern struct wm_table ddr4_wm_table_gs; extern struct wm_table lpddr4_wm_table_gs; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c index ee99974b3b62b..7af19823a29db 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c @@ -307,16 +307,6 @@ static void dcn314_enable_pme_wa(struct clk_mgr *clk_mgr_base) dcn314_smu_enable_pme_wa(clk_mgr); } -void dcn314_init_clocks(struct clk_mgr *clk_mgr) -{ - memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); - // Assumption is that boot state always supports pstate - clk_mgr->clks.p_state_change_support = true; - clk_mgr->clks.prev_p_state_change_support = true; - clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN; - clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN; -} - bool dcn314_are_clock_states_equal(struct dc_clocks *a, struct dc_clocks *b) { @@ -641,7 +631,7 @@ static struct clk_mgr_funcs dcn314_funcs = { .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz, .update_clocks = dcn314_update_clocks, - .init_clocks = dcn314_init_clocks, + .init_clocks = dcn31_init_clocks, .enable_pme_wa = dcn314_enable_pme_wa, .are_clock_states_equal = dcn314_are_clock_states_equal, .notify_wm_ranges = dcn314_notify_wm_ranges diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h index c695a4498c50f..171f84340eb2f 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.h @@ -42,7 +42,7 @@ struct clk_mgr_dcn314 { bool dcn314_are_clock_states_equal(struct dc_clocks *a, struct dc_clocks *b); -void dcn314_init_clocks(struct clk_mgr *clk_mgr); + void dcn314_update_clocks(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool safe_to_lower); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 382daa5c9ec15..cf99097887076 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -642,14 +642,17 @@ bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream, /** * dc_stream_get_crc() - Get CRC values for the given stream. - * @dc: DC object + * + * @dc: DC object. * @stream: The DC stream state of the stream to get CRCs from. - * @r_cr: CRC value for the first of the 3 channels stored here. - * @g_y: CRC value for the second of the 3 channels stored here. - * @b_cb: CRC value for the third of the 3 channels stored here. + * @r_cr: CRC value for the red component. + * @g_y: CRC value for the green component. + * @b_cb: CRC value for the blue component. * * dc_stream_configure_crc needs to be called beforehand to enable CRCs. - * Return false if stream is not found, or if CRCs are not enabled. + * + * Return: + * false if stream is not found, or if CRCs are not enabled. */ bool dc_stream_get_crc(struct dc *dc, struct dc_stream_state *stream, uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb) @@ -3242,7 +3245,7 @@ static void commit_planes_for_stream(struct dc *dc, odm_pipe->ttu_regs.min_ttu_vblank = MAX_TTU; } - if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) { + if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) if (top_pipe_to_program && top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) { if (should_use_dmub_lock(stream->link)) { @@ -3260,7 +3263,6 @@ static void commit_planes_for_stream(struct dc *dc, top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable( top_pipe_to_program->stream_res.tg); } - } if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) { if (dc->hwss.subvp_pipe_control_lock) @@ -3479,7 +3481,7 @@ static void commit_planes_for_stream(struct dc *dc, dc->hwss.pipe_control_lock(dc, top_pipe_to_program, false); } - if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) { + if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) if (top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) { top_pipe_to_program->stream_res.tg->funcs->wait_for_state( top_pipe_to_program->stream_res.tg, @@ -3506,21 +3508,19 @@ static void commit_planes_for_stream(struct dc *dc, top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_disable( top_pipe_to_program->stream_res.tg); } - } - if (update_type != UPDATE_TYPE_FAST) { + if (update_type != UPDATE_TYPE_FAST) dc->hwss.post_unlock_program_front_end(dc, context); - /* Since phantom pipe programming is moved to post_unlock_program_front_end, - * move the SubVP lock to after the phantom pipes have been setup - */ - if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) { - if (dc->hwss.subvp_pipe_control_lock) - dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, NULL, subvp_prev_use); - } else { - if (dc->hwss.subvp_pipe_control_lock) - dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, top_pipe_to_program, subvp_prev_use); - } + /* Since phantom pipe programming is moved to post_unlock_program_front_end, + * move the SubVP lock to after the phantom pipes have been setup + */ + if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) { + if (dc->hwss.subvp_pipe_control_lock) + dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, NULL, subvp_prev_use); + } else { + if (dc->hwss.subvp_pipe_control_lock) + dc->hwss.subvp_pipe_control_lock(dc, context, false, should_lock_all_pipes, top_pipe_to_program, subvp_prev_use); } // Fire manual trigger only when bottom plane is flipped @@ -4311,8 +4311,8 @@ void dc_mclk_switch_using_fw_based_vblank_stretch_shut_down(struct dc *dc) /* ***************************************************************************** * Function: dc_is_dmub_outbox_supported - - * - * @brief + * + * @brief * Checks whether DMUB FW supports outbox notifications, if supported * DM should register outbox interrupt prior to actually enabling interrupts * via dc_enable_dmub_outbox @@ -4332,7 +4332,7 @@ bool dc_is_dmub_outbox_supported(struct dc *dc) !dc->debug.dpia_debug.bits.disable_dpia) return true; - if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_2 && + if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_1 && !dc->debug.dpia_debug.bits.disable_dpia) return true; @@ -4380,6 +4380,7 @@ void dc_enable_dmub_outbox(struct dc *dc) struct dc_context *dc_ctx = dc->ctx; dmub_enable_outbox_notification(dc_ctx->dmub_srv); + DC_LOG_DC("%s: dmub outbox notifications enabled\n", __func__); } /** diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 6911fea69a521..89f80a3bb9f2e 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -3379,7 +3379,7 @@ bool dc_link_setup_psr(struct dc_link *link, switch(link->ctx->asic_id.chip_family) { case FAMILY_YELLOW_CARP: case AMDGPU_FAMILY_GC_10_3_6: - case AMDGPU_FAMILY_GC_11_0_2: + case AMDGPU_FAMILY_GC_11_0_1: if(!dc->debug.disable_z10) psr_context->psr_level.bits.SKIP_CRTC_DISABLE = false; break; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index e2d3b0beaa39a..59b07f743d99d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -169,7 +169,7 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id) if (ASICREV_IS_GC_11_0_2(asic_id.hw_internal_rev)) dc_version = DCN_VERSION_3_21; break; - case AMDGPU_FAMILY_GC_11_0_2: + case AMDGPU_FAMILY_GC_11_0_1: dc_version = DCN_VERSION_3_14; break; default: diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index adef5583963d4..6621f608b5a98 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -47,7 +47,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.196" +#define DC_VER "3.2.197" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -118,7 +118,26 @@ struct dc_plane_cap { uint32_t min_height; }; -// Color management caps (DPP and MPC) +/** + * DOC: color-management-caps + * + * **Color management caps (DPP and MPC)** + * + * Modules/color calculates various color operations which are translated to + * abstracted HW. DCE 5-12 had almost no important changes, but starting with + * DCN1, every new generation comes with fairly major differences in color + * pipeline. Therefore, we abstract color pipe capabilities so modules/DM can + * decide mapping to HW block based on logical capabilities. + */ + +/** + * struct rom_curve_caps - predefined transfer function caps for degamma and regamma + * @srgb: RGB color space transfer func + * @bt2020: BT.2020 transfer func + * @gamma2_2: standard gamma + * @pq: perceptual quantizer transfer function + * @hlg: hybrid log–gamma transfer function + */ struct rom_curve_caps { uint16_t srgb : 1; uint16_t bt2020 : 1; @@ -127,36 +146,68 @@ struct rom_curve_caps { uint16_t hlg : 1; }; +/** + * struct dpp_color_caps - color pipeline capabilities for display pipe and + * plane blocks + * + * @dcn_arch: all DCE generations treated the same + * @input_lut_shared: shared with DGAM. Input LUT is different than most LUTs, + * just plain 256-entry lookup + * @icsc: input color space conversion + * @dgam_ram: programmable degamma LUT + * @post_csc: post color space conversion, before gamut remap + * @gamma_corr: degamma correction + * @hw_3d_lut: 3D LUT support. It implies a shaper LUT before. It may be shared + * with MPC by setting mpc:shared_3d_lut flag + * @ogam_ram: programmable out/blend gamma LUT + * @ocsc: output color space conversion + * @dgam_rom_for_yuv: pre-defined degamma LUT for YUV planes + * @dgam_rom_caps: pre-definied curve caps for degamma 1D LUT + * @ogam_rom_caps: pre-definied curve caps for regamma 1D LUT + * + * Note: hdr_mult and gamut remap (CTM) are always available in DPP (in that order) + */ struct dpp_color_caps { - uint16_t dcn_arch : 1; // all DCE generations treated the same - // input lut is different than most LUTs, just plain 256-entry lookup - uint16_t input_lut_shared : 1; // shared with DGAM + uint16_t dcn_arch : 1; + uint16_t input_lut_shared : 1; uint16_t icsc : 1; uint16_t dgam_ram : 1; - uint16_t post_csc : 1; // before gamut remap + uint16_t post_csc : 1; uint16_t gamma_corr : 1; - - // hdr_mult and gamut remap always available in DPP (in that order) - // 3d lut implies shaper LUT, - // it may be shared with MPC - check MPC:shared_3d_lut flag uint16_t hw_3d_lut : 1; - uint16_t ogam_ram : 1; // blnd gam + uint16_t ogam_ram : 1; uint16_t ocsc : 1; uint16_t dgam_rom_for_yuv : 1; struct rom_curve_caps dgam_rom_caps; struct rom_curve_caps ogam_rom_caps; }; +/** + * struct mpc_color_caps - color pipeline capabilities for multiple pipe and + * plane combined blocks + * + * @gamut_remap: color transformation matrix + * @ogam_ram: programmable out gamma LUT + * @ocsc: output color space conversion matrix + * @num_3dluts: MPC 3D LUT; always assumes a preceding shaper LUT + * @shared_3d_lut: shared 3D LUT flag. Can be either DPP or MPC, but single + * instance + * @ogam_rom_caps: pre-definied curve caps for regamma 1D LUT + */ struct mpc_color_caps { uint16_t gamut_remap : 1; uint16_t ogam_ram : 1; uint16_t ocsc : 1; - uint16_t num_3dluts : 3; //3d lut always assumes a preceding shaper LUT - uint16_t shared_3d_lut:1; //can be in either DPP or MPC, but single instance - + uint16_t num_3dluts : 3; + uint16_t shared_3d_lut:1; struct rom_curve_caps ogam_rom_caps; }; +/** + * struct dc_color_caps - color pipes capabilities for DPP and MPC hw blocks + * @dpp: color pipes caps for DPP + * @mpc: color pipes caps for MPC + */ struct dc_color_caps { struct dpp_color_caps dpp; struct mpc_color_caps mpc; @@ -213,6 +264,7 @@ struct dc_caps { uint32_t cache_num_ways; uint16_t subvp_fw_processing_delay_us; uint16_t subvp_prefetch_end_to_mall_start_us; + uint8_t subvp_swath_height_margin_lines; // subvp start line must be aligned to 2 x swath height uint16_t subvp_pstate_allow_width_us; uint16_t subvp_vertical_int_margin_us; bool seamless_odm; @@ -384,9 +436,31 @@ enum dcc_option { DCC_HALF_REQ_DISALBE = 2, }; +/** + * enum pipe_split_policy - Pipe split strategy supported by DCN + * + * This enum is used to define the pipe split policy supported by DCN. By + * default, DC favors MPC_SPLIT_DYNAMIC. + */ enum pipe_split_policy { + /** + * @MPC_SPLIT_DYNAMIC: DC will automatically decide how to split the + * pipe in order to bring the best trade-off between performance and + * power consumption. This is the recommended option. + */ MPC_SPLIT_DYNAMIC = 0, + + /** + * @MPC_SPLIT_DYNAMIC: Avoid pipe split, which means that DC will not + * try any sort of split optimization. + */ MPC_SPLIT_AVOID = 1, + + /** + * @MPC_SPLIT_DYNAMIC: With this option, DC will only try to optimize + * the pipe utilization when using a single display; if the user + * connects to a second display, DC will avoid pipe split. + */ MPC_SPLIT_AVOID_MULT_DISP = 2, }; @@ -609,6 +683,7 @@ struct dc_bounding_box_overrides { int percent_of_ideal_drambw; int dram_clock_change_latency_ns; int dummy_clock_change_latency_ns; + int fclk_clock_change_latency_ns; /* This forces a hard min on the DCFCLK we use * for DML. Unlike the debug option for forcing * DCFCLK, this override affects watermark calculations @@ -620,6 +695,14 @@ struct dc_state; struct resource_pool; struct dce_hwseq; +/** + * struct dc_debug_options - DC debug struct + * + * This struct provides a simple mechanism for developers to change some + * configurations, enable/disable features, and activate extra debug options. + * This can be very handy to narrow down whether some specific feature is + * causing an issue or not. + */ struct dc_debug_options { bool native422_support; bool disable_dsc; @@ -639,6 +722,11 @@ struct dc_debug_options { bool disable_stutter; bool use_max_lb; enum dcc_option disable_dcc; + + /** + * @pipe_split_policy: Define which pipe split policy is used by the + * display core. + */ enum pipe_split_policy pipe_split_policy; bool force_single_disp_pipe_split; bool voltage_align_fclk; @@ -753,6 +841,7 @@ struct dc_debug_options { uint32_t mst_start_top_delay; uint8_t psr_power_use_phy_fsm; enum dml_hostvm_override_opts dml_hostvm_override; + bool dml_disallow_alternate_prefetch_modes; bool use_legacy_soc_bb_mechanism; bool exit_idle_opt_for_cursor_updates; bool enable_single_display_2to1_odm_policy; @@ -812,6 +901,17 @@ struct dc { uint32_t *dcn_reg_offsets; uint32_t *nbio_reg_offsets; + + /* Scratch memory */ + struct { + struct { + /* + * For matching clock_limits table in driver with table + * from PMFW. + */ + struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES]; + } update_bw_bounding_box; + } scratch; }; enum frame_buffer_mode { diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 2d61c2a91cee2..c8059c28ac494 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -275,8 +275,7 @@ void dc_dmub_srv_set_drr_manual_trigger_cmd(struct dc *dc, uint32_t tg_inst) union dmub_rb_cmd cmd = { 0 }; cmd.drr_update.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; - // TODO: Uncomment once FW headers are promoted - //cmd.drr_update.header.sub_type = DMUB_CMD__FAMS_SET_MANUAL_TRIGGER; + cmd.drr_update.header.sub_type = DMUB_CMD__FAMS_SET_MANUAL_TRIGGER; cmd.drr_update.dmub_optc_state_req.tg_inst = tg_inst; cmd.drr_update.header.payload_bytes = sizeof(cmd.drr_update) - sizeof(cmd.drr_update.header); @@ -612,6 +611,7 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc, main_timing->v_total - main_timing->v_front_porch - main_timing->v_addressable; pipe_data->pipe_config.subvp_data.mall_region_lines = phantom_timing->v_addressable; pipe_data->pipe_config.subvp_data.main_pipe_index = subvp_pipe->pipe_idx; + pipe_data->pipe_config.subvp_data.is_drr = subvp_pipe->stream->ignore_msa_timing_param; // Prefetch lines is equal to VACTIVE + BP + VSYNC pipe_data->pipe_config.subvp_data.prefetch_lines = diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index 982d96e97a3dc..848db8676adfd 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -417,19 +417,43 @@ enum dc_scan_direction { SCAN_DIRECTION_VERTICAL = 2, /* 90, 270 rotation */ }; +/** + * struct dc_cursor_position: Hardware cursor data. + * + * This struct keeps the action information related to the cursor that will be + * sent and received from our DC core. + */ struct dc_cursor_position { + /** + * @x: It represents the top left abscissa coordinate of the cursor. + */ uint32_t x; + + /** + * @y: It is the top ordinate of the cursor coordinate. + */ uint32_t y; + /** + * @x_hotspot: Define the abscissa point where mouse click happens. + */ uint32_t x_hotspot; + + /** + * @y_hotspot: Define the ordinate point where mouse click happens. + */ uint32_t y_hotspot; - /* - * This parameter indicates whether HW cursor should be enabled + /** + * @enable: This parameter indicates whether hardware cursor should be + * enabled. */ bool enable; - /* Translate cursor x/y by the source rectangle for each plane. */ + /** + * @translate_by_source: Translate cursor x/y by the source rectangle + * for each plane. + */ bool translate_by_source; }; @@ -494,7 +518,9 @@ struct dc_gamma { /* Used by both ipp amd opp functions*/ /* TODO: to be consolidated with enum color_space */ -/* +/** + * enum dc_cursor_color_format - DC cursor programming mode + * * This enum is for programming CURSOR_MODE register field. What this register * should be programmed to depends on OS requested cursor shape flags and what * we stored in the cursor surface. @@ -530,17 +556,39 @@ union dc_cursor_attribute_flags { }; struct dc_cursor_attributes { + /** + * @address: This field represents the framebuffer address associated + * with the cursor. It is important to highlight that this address is + * divided into a high and low parts. + */ PHYSICAL_ADDRESS_LOC address; + + /** + * @pitch: Cursor line stride. + */ uint32_t pitch; - /* Width and height should correspond to cursor surface width x heigh */ + /** + * @width: Width should correspond to cursor surface width. + */ uint32_t width; + /** + * @heigh: Height should correspond to cursor surface heigh. + */ uint32_t height; + /** + * @color_format: DC cursor programming mode. + */ enum dc_cursor_color_format color_format; - uint32_t sdr_white_level; // for boosting (SDR) cursor in HDR mode + /** + * @sdr_white_level: Boosting (SDR) cursor in HDR mode. + */ + uint32_t sdr_white_level; - /* In case we support HW Cursor rotation in the future */ + /** + * @rotation_angle: In case we support HW Cursor rotation in the future + */ enum dc_rotation_angle rotation_angle; union dc_cursor_attribute_flags attribute_flags; @@ -765,22 +813,107 @@ struct dc_dsc_config { uint32_t mst_pbn; /* pbn of display on dsc mst hub */ }; +/** + * struct dc_crtc_timing - Timing parameters used to configure DCN blocks + * + * DCN provides multiple signals and parameters that can be used to adjust + * timing parameters, this struct aggregate multiple of these values for easy + * access. In this struct, fields prefixed with h_* are related to horizontal + * timing, and v_* to vertical timing. Keep in mind that when we talk about + * vertical timings, the values, in general, are described in the number of + * lines; on the other hand, the horizontal values are in pixels. + */ struct dc_crtc_timing { + /** + * @h_total: The total number of pixels from the rising edge of HSync + * until the rising edge of the current HSync. + */ uint32_t h_total; + + /** + * @h_border_left: The black pixels related to the left border + */ uint32_t h_border_left; + + /** + * @h_addressable: It is the range of pixels displayed horizontally. + * For example, if the display resolution is 3840@2160, the horizontal + * addressable area is 3840. + */ uint32_t h_addressable; + + /** + * @h_border_right: The black pixels related to the right border + */ uint32_t h_border_right; + + /** + * @h_front_porch: Period (in pixels) between HBlank start and the + * rising edge of HSync. + */ uint32_t h_front_porch; + + /** + * @h_sync_width: HSync duration in pixels. + */ uint32_t h_sync_width; + /** + * @v_total: It is the total number of lines from the rising edge of + * the previous VSync until the rising edge of the current VSync. + * + * |--------------------------| + * +-+ V_TOTAL +-+ + * | | | | + * VSync ---+ +--------- // -----------+ +--- + */ uint32_t v_total; + + /** + * @v_border_top: The black border on the top. + */ uint32_t v_border_top; + + /** + * @v_addressable: It is the range of the scanout at which the + * framebuffer is displayed. For example, if the display resolution is + * 3840@2160, the addressable area is 2160 lines, or if the resolution + * is 1920x1080, the addressable area is 1080 lines. + */ uint32_t v_addressable; + + /** + * @v_border_bottom: The black border on the bottom. + */ uint32_t v_border_bottom; + + /** + * @v_front_porch: Period (in lines) between VBlank start and rising + * edge of VSync. + * +-+ + * VSync | | + * ----------+ +--------... + * +------------------... + * VBlank | + * --+ + * |-------| + * v_front_porch + */ uint32_t v_front_porch; + + /** + * @v_sync_width: VSync signal width in lines. + */ uint32_t v_sync_width; + /** + * @pix_clk_100hz: Pipe pixel precision + * + * This field is used to communicate pixel clocks with 100 Hz accuracy + * from dc_crtc_timing to BIOS command table. + */ uint32_t pix_clk_100hz; + uint32_t min_refresh_in_uhz; uint32_t vic; diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index a0af0f6afeef8..9544abf75e846 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -344,6 +344,7 @@ enum dc_detect_reason { DETECT_REASON_HPDRX, DETECT_REASON_FALLBACK, DETECT_REASON_RETRAIN, + DETECT_REASON_TDR, }; bool dc_link_detect(struct dc_link *dc_link, enum dc_detect_reason reason); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index df0c36c630216..f6fd2073951d5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -112,6 +112,7 @@ void dcn10_lock_all_pipes(struct dc *dc, */ if (pipe_ctx->top_pipe || !pipe_ctx->stream || + !pipe_ctx->plane_state || !tg->funcs->is_tg_enabled(tg)) continue; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c index 769974375b4b3..8e9384094f6d6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c @@ -131,6 +131,12 @@ struct mpcc *mpc1_get_mpcc_for_dpp(struct mpc_tree *tree, int dpp_id) while (tmp_mpcc != NULL) { if (tmp_mpcc->dpp_id == dpp_id) return tmp_mpcc; + + /* avoid circular linked list */ + ASSERT(tmp_mpcc != tmp_mpcc->mpcc_bot); + if (tmp_mpcc == tmp_mpcc->mpcc_bot) + break; + tmp_mpcc = tmp_mpcc->mpcc_bot; } return NULL; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 968b089d86e92..0877ab143b98b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -312,6 +312,20 @@ void optc1_program_timing( } } +/** + * optc1_set_vtg_params - Set Vertical Timing Generator (VTG) parameters + * + * @optc: timing_generator struct used to extract the optc parameters + * @dc_crtc_timing: Timing parameters configured + * @program_fp2: Boolean value indicating if FP2 will be programmed or not + * + * OTG is responsible for generating the global sync signals, including + * vertical timing information for each HUBP in the dcfclk domain. Each VTG is + * associated with one OTG that provides HUBP with vertical timing information + * (i.e., there is 1:1 correspondence between OTG and VTG). This function is + * responsible for setting the OTG parameters to the VTG during the pipe + * programming. + */ void optc1_set_vtg_params(struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing, bool program_fp2) { @@ -465,6 +479,11 @@ void optc1_enable_optc_clock(struct timing_generator *optc, bool enable) OTG_CLOCK_ON, 1, 1, 1000); } else { + + //last chance to clear underflow, otherwise, it will always there due to clock is off. + if (optc->funcs->is_optc_underflow_occurred(optc) == true) + optc->funcs->clear_optc_underflow(optc); + REG_UPDATE_2(OTG_CLOCK_CONTROL, OTG_CLOCK_GATE_DIS, 0, OTG_CLOCK_EN, 0); @@ -1493,8 +1512,23 @@ bool optc1_configure_crc(struct timing_generator *optc, return true; } +/** + * optc1_get_crc - Capture CRC result per component + * + * @optc: timing_generator instance. + * @r_cr: 16-bit primary CRC signature for red data. + * @g_y: 16-bit primary CRC signature for green data. + * @b_cb: 16-bit primary CRC signature for blue data. + * + * This function reads the CRC signature from the OPTC registers. Notice that + * we have three registers to keep the CRC result per color component (RGB). + * + * Returns: + * If CRC is disabled, return false; otherwise, return true, and the CRC + * results in the parameters. + */ bool optc1_get_crc(struct timing_generator *optc, - uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb) + uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb) { uint32_t field = 0; struct optc *optc1 = DCN10TG_FROM_TG(optc); @@ -1505,12 +1539,14 @@ bool optc1_get_crc(struct timing_generator *optc, if (!field) return false; + /* OTG_CRC0_DATA_RG has the CRC16 results for the red and green component */ REG_GET_2(OTG_CRC0_DATA_RG, - CRC0_R_CR, r_cr, - CRC0_G_Y, g_y); + CRC0_R_CR, r_cr, + CRC0_G_Y, g_y); + /* OTG_CRC0_DATA_B has the CRC16 results for the blue component */ REG_GET(OTG_CRC0_DATA_B, - CRC0_B_CB, b_cb); + CRC0_B_CB, b_cb); return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c index 3d307dd58e9af..116f67a0b989d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c @@ -531,6 +531,12 @@ static struct mpcc *mpc2_get_mpcc_for_dpp(struct mpc_tree *tree, int dpp_id) while (tmp_mpcc != NULL) { if (tmp_mpcc->dpp_id == 0xf || tmp_mpcc->dpp_id == dpp_id) return tmp_mpcc; + + /* avoid circular linked list */ + ASSERT(tmp_mpcc != tmp_mpcc->mpcc_bot); + if (tmp_mpcc == tmp_mpcc->mpcc_bot) + break; + tmp_mpcc = tmp_mpcc->mpcc_bot; } return NULL; diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c index 6a4dcafb9bba5..dc3e8df706b34 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c @@ -86,7 +86,7 @@ bool hubp3_program_surface_flip_and_addr( VMID, address->vmid); if (address->type == PLN_ADDR_TYPE_GRPH_STEREO) { - REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0x1); + REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0); REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x1); } else { diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c index f06b24dac4fc1..372b79cf35a15 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -374,7 +374,7 @@ static struct stream_encoder *dcn303_stream_encoder_create(enum engine_id eng_id int afmt_inst; /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ - if (eng_id <= ENGINE_ID_DIGE) { + if (eng_id <= ENGINE_ID_DIGB) { vpg_inst = eng_id; afmt_inst = eng_id; } else diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 704a610d5ffe8..dfbdc7fd3adf9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -2165,7 +2165,7 @@ static bool dcn31_resource_construct( pool->base.usb4_dpia_count = 4; } - if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_2) + if (dc->ctx->asic_id.chip_family == AMDGPU_FAMILY_GC_11_0_1) pool->base.usb4_dpia_count = 4; /* Audio, Stream Encoders including HPO and virtual, MPC 3D LUTs */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h index 41f8ec99da6b3..901436591ed45 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h @@ -32,7 +32,6 @@ container_of(pool, struct dcn31_resource_pool, base) extern struct _vcs_dpi_ip_params_st dcn3_1_ip; -extern struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc; struct dcn31_resource_pool { struct resource_pool base; diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile index e3b5a95e03b19..702c28c2560eb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn314/Makefile @@ -13,31 +13,6 @@ DCN314 = dcn314_resource.o dcn314_hwseq.o dcn314_init.o \ dcn314_dio_stream_encoder.o dcn314_dccg.o dcn314_optc.o -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o := -mhard-float -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o += -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/dcn314/dcn314_resource.o += -msse2 -endif -endif - AMD_DAL_DCN314 = $(addprefix $(AMDDALPATH)/dc/dcn314/,$(DCN314)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN314) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c index 29085be7a2fd3..438b984750ee8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c @@ -347,7 +347,10 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig { struct dc_stream_state *stream = pipe_ctx->stream; unsigned int odm_combine_factor = 0; + struct dc *dc = pipe_ctx->stream->ctx->dc; + bool two_pix_per_container = false; + two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing); odm_combine_factor = get_odm_config(pipe_ctx, NULL); if (is_dp_128b_132b_signal(pipe_ctx)) { @@ -359,16 +362,13 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig else *k2_div = PIXEL_RATE_DIV_BY_4; } else if (dc_is_dp_signal(pipe_ctx->stream->signal)) { - if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) { + if (two_pix_per_container) { *k1_div = PIXEL_RATE_DIV_BY_1; *k2_div = PIXEL_RATE_DIV_BY_2; - } else if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) { - *k1_div = PIXEL_RATE_DIV_BY_2; - *k2_div = PIXEL_RATE_DIV_BY_2; } else { - if (odm_combine_factor == 1) - *k2_div = PIXEL_RATE_DIV_BY_4; - else if (odm_combine_factor == 2) + *k1_div = PIXEL_RATE_DIV_BY_1; + *k2_div = PIXEL_RATE_DIV_BY_4; + if ((odm_combine_factor == 2) || dc->debug.enable_dp_dig_pixel_rate_div_policy) *k2_div = PIXEL_RATE_DIV_BY_2; } } @@ -378,3 +378,31 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig return odm_combine_factor; } + +void dcn314_set_pixels_per_cycle(struct pipe_ctx *pipe_ctx) +{ + uint32_t pix_per_cycle = 1; + uint32_t odm_combine_factor = 1; + + if (!pipe_ctx || !pipe_ctx->stream || !pipe_ctx->stream_res.stream_enc) + return; + + odm_combine_factor = get_odm_config(pipe_ctx, NULL); + if (optc2_is_two_pixels_per_containter(&pipe_ctx->stream->timing) || odm_combine_factor > 1 + || dcn314_is_dp_dig_pixel_rate_div_policy(pipe_ctx)) + pix_per_cycle = 2; + + if (pipe_ctx->stream_res.stream_enc->funcs->set_input_mode) + pipe_ctx->stream_res.stream_enc->funcs->set_input_mode(pipe_ctx->stream_res.stream_enc, + pix_per_cycle); +} + +bool dcn314_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx) +{ + struct dc *dc = pipe_ctx->stream->ctx->dc; + + if (dc_is_dp_signal(pipe_ctx->stream->signal) && !is_dp_128b_132b_signal(pipe_ctx) && + dc->debug.enable_dp_dig_pixel_rate_div_policy) + return true; + return false; +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h index be0f5e4d48e13..d014580592aca 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.h @@ -39,4 +39,8 @@ void dcn314_enable_power_gating_plane(struct dce_hwseq *hws, bool enable); unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsigned int *k1_div, unsigned int *k2_div); +void dcn314_set_pixels_per_cycle(struct pipe_ctx *pipe_ctx); + +bool dcn314_is_dp_dig_pixel_rate_div_policy(struct pipe_ctx *pipe_ctx); + #endif /* __DC_HWSS_DCN314_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c index 9dc4a8d182520..fc2f93edf34ea 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c @@ -147,6 +147,8 @@ static const struct hwseq_private_funcs dcn314_private_funcs = { .set_shaper_3dlut = dcn20_set_shaper_3dlut, .setup_hpo_hw_control = dcn31_setup_hpo_hw_control, .calculate_dccg_k1_k2_values = dcn314_calculate_dccg_k1_k2_values, + .set_pixels_per_cycle = dcn314_set_pixels_per_cycle, + .is_dp_dig_pixel_rate_div_policy = dcn314_is_dp_dig_pixel_rate_div_policy, }; void dcn314_hw_sequencer_construct(struct dc *dc) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c index 64daa631497ee..ecd2e4446ea1f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c @@ -70,6 +70,7 @@ #include "dce110/dce110_resource.h" #include "dml/display_mode_vba.h" #include "dml/dcn31/dcn31_fpu.h" +#include "dml/dcn314/dcn314_fpu.h" #include "dcn314/dcn314_dccg.h" #include "dcn10/dcn10_resource.h" #include "dcn31/dcn31_panel_cntl.h" @@ -132,155 +133,6 @@ static const struct IP_BASE DCN_BASE = { { { { 0x00000012, 0x000000C0, 0x000034C #define DC_LOGGER_INIT(logger) -#define DCN3_14_DEFAULT_DET_SIZE 384 -#define DCN3_14_MAX_DET_SIZE 384 -#define DCN3_14_MIN_COMPBUF_SIZE_KB 128 -#define DCN3_14_CRB_SEGMENT_SIZE_KB 64 -struct _vcs_dpi_ip_params_st dcn3_14_ip = { - .VBlankNomDefaultUS = 668, - .gpuvm_enable = 1, - .gpuvm_max_page_table_levels = 1, - .hostvm_enable = 1, - .hostvm_max_page_table_levels = 2, - .rob_buffer_size_kbytes = 64, - .det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE, - .config_return_buffer_size_in_kbytes = 1792, - .compressed_buffer_segment_size_in_kbytes = 64, - .meta_fifo_size_in_kentries = 32, - .zero_size_buffer_entries = 512, - .compbuf_reserved_space_64b = 256, - .compbuf_reserved_space_zs = 64, - .dpp_output_buffer_pixels = 2560, - .opp_output_buffer_lines = 1, - .pixel_chunk_size_kbytes = 8, - .meta_chunk_size_kbytes = 2, - .min_meta_chunk_size_bytes = 256, - .writeback_chunk_size_kbytes = 8, - .ptoi_supported = false, - .num_dsc = 4, - .maximum_dsc_bits_per_component = 10, - .dsc422_native_support = false, - .is_line_buffer_bpp_fixed = true, - .line_buffer_fixed_bpp = 48, - .line_buffer_size_bits = 789504, - .max_line_buffer_lines = 12, - .writeback_interface_buffer_size_kbytes = 90, - .max_num_dpp = 4, - .max_num_otg = 4, - .max_num_hdmi_frl_outputs = 1, - .max_num_wb = 1, - .max_dchub_pscl_bw_pix_per_clk = 4, - .max_pscl_lb_bw_pix_per_clk = 2, - .max_lb_vscl_bw_pix_per_clk = 4, - .max_vscl_hscl_bw_pix_per_clk = 4, - .max_hscl_ratio = 6, - .max_vscl_ratio = 6, - .max_hscl_taps = 8, - .max_vscl_taps = 8, - .dpte_buffer_size_in_pte_reqs_luma = 64, - .dpte_buffer_size_in_pte_reqs_chroma = 34, - .dispclk_ramp_margin_percent = 1, - .max_inter_dcn_tile_repeaters = 8, - .cursor_buffer_size = 16, - .cursor_chunk_size = 2, - .writeback_line_buffer_buffer_size = 0, - .writeback_min_hscl_ratio = 1, - .writeback_min_vscl_ratio = 1, - .writeback_max_hscl_ratio = 1, - .writeback_max_vscl_ratio = 1, - .writeback_max_hscl_taps = 1, - .writeback_max_vscl_taps = 1, - .dppclk_delay_subtotal = 46, - .dppclk_delay_scl = 50, - .dppclk_delay_scl_lb_only = 16, - .dppclk_delay_cnvc_formatter = 27, - .dppclk_delay_cnvc_cursor = 6, - .dispclk_delay_subtotal = 119, - .dynamic_metadata_vm_enabled = false, - .odm_combine_4to1_supported = false, - .dcc_supported = true, -}; - -struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = { - /*TODO: correct dispclk/dppclk voltage level determination*/ - .clock_limits = { - { - .state = 0, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, - .phyclk_mhz = 600.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 186.0, - .dtbclk_mhz = 625.0, - }, - { - .state = 1, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 209.0, - .dtbclk_mhz = 625.0, - }, - { - .state = 2, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 209.0, - .dtbclk_mhz = 625.0, - }, - { - .state = 3, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 371.0, - .dtbclk_mhz = 625.0, - }, - { - .state = 4, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 417.0, - .dtbclk_mhz = 625.0, - }, - }, - .num_states = 5, - .sr_exit_time_us = 9.0, - .sr_enter_plus_exit_time_us = 11.0, - .sr_exit_z8_time_us = 442.0, - .sr_enter_plus_exit_z8_time_us = 560.0, - .writeback_latency_us = 12.0, - .dram_channel_width_bytes = 4, - .round_trip_ping_latency_dcfclk_cycles = 106, - .urgent_latency_pixel_data_only_us = 4.0, - .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, - .urgent_latency_vm_data_only_us = 4.0, - .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, - .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, - .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, - .pct_ideal_sdp_bw_after_urgent = 80.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, - .max_avg_sdp_bw_use_normal_percent = 60.0, - .max_avg_dram_bw_use_normal_percent = 60.0, - .fabric_datapath_to_dcn_data_return_bytes = 32, - .return_bus_width_bytes = 64, - .downspread_percent = 0.38, - .dcn_downspread_percent = 0.5, - .gpuvm_min_page_size_bytes = 4096, - .hostvm_min_page_size_bytes = 4096, - .do_urgent_latency_adjustment = false, - .urgent_latency_adjustment_fabric_clock_component_us = 0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, -}; - enum dcn31_clk_src_array_id { DCN31_CLK_SRC_PLL0, DCN31_CLK_SRC_PLL1, @@ -1404,7 +1256,7 @@ static struct stream_encoder *dcn314_stream_encoder_create( int afmt_inst; /* Mapping of VPG, AFMT, DME register blocks to DIO block instance */ - if (eng_id <= ENGINE_ID_DIGF) { + if (eng_id < ENGINE_ID_DIGF) { vpg_inst = eng_id; afmt_inst = eng_id; } else @@ -1449,7 +1301,8 @@ static struct hpo_dp_stream_encoder *dcn31_hpo_dp_stream_encoder_create( * VPG[8] -> HPO_DP[2] * VPG[9] -> HPO_DP[3] */ - vpg_inst = hpo_dp_inst + 6; + //Uses offset index 5-8, but actually maps to vpg_inst 6-9 + vpg_inst = hpo_dp_inst + 5; /* Mapping of APG register blocks to HPO DP block instance: * APG[0] -> HPO_DP[0] @@ -1799,109 +1652,16 @@ static struct clock_source *dcn31_clock_source_create( return NULL; } -static bool is_dual_plane(enum surface_pixel_format format) -{ - return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA; -} - static int dcn314_populate_dml_pipes_from_context( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, bool fast_validate) { - int i, pipe_cnt; - struct resource_context *res_ctx = &context->res_ctx; - struct pipe_ctx *pipe; - bool upscaled = false; - - dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); - - for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { - struct dc_crtc_timing *timing; - - if (!res_ctx->pipe_ctx[i].stream) - continue; - pipe = &res_ctx->pipe_ctx[i]; - timing = &pipe->stream->timing; - - if (dc_extended_blank_supported(dc) && pipe->stream->adjust.v_total_max == pipe->stream->adjust.v_total_min - && pipe->stream->adjust.v_total_min > timing->v_total) - pipes[pipe_cnt].pipe.dest.vtotal = pipe->stream->adjust.v_total_min; - - if (pipe->plane_state && - (pipe->plane_state->src_rect.height < pipe->plane_state->dst_rect.height || - pipe->plane_state->src_rect.width < pipe->plane_state->dst_rect.width)) - upscaled = true; - - /* - * Immediate flip can be set dynamically after enabling the plane. - * We need to require support for immediate flip or underflow can be - * intermittently experienced depending on peak b/w requirements. - */ - pipes[pipe_cnt].pipe.src.immediate_flip = true; - - pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; - pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active; - pipes[pipe_cnt].pipe.src.gpuvm = true; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; - pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; - pipes[pipe_cnt].pipe.src.dcc_rate = 3; - pipes[pipe_cnt].dout.dsc_input_bpc = 0; - - if (pipes[pipe_cnt].dout.dsc_enable) { - switch (timing->display_color_depth) { - case COLOR_DEPTH_888: - pipes[pipe_cnt].dout.dsc_input_bpc = 8; - break; - case COLOR_DEPTH_101010: - pipes[pipe_cnt].dout.dsc_input_bpc = 10; - break; - case COLOR_DEPTH_121212: - pipes[pipe_cnt].dout.dsc_input_bpc = 12; - break; - default: - ASSERT(0); - break; - } - } - - pipe_cnt++; - } - context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE; - - dc->config.enable_4to1MPC = false; - if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) { - if (is_dual_plane(pipe->plane_state->format) - && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) { - dc->config.enable_4to1MPC = true; - } else if (!is_dual_plane(pipe->plane_state->format) && pipe->plane_state->src_rect.width <= 5120) { - /* Limit to 5k max to avoid forced pipe split when there is not enough detile for swath */ - context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; - pipes[0].pipe.src.unbounded_req_mode = true; - } - } else if (context->stream_count >= dc->debug.crb_alloc_policy_min_disp_count - && dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) { - context->bw_ctx.dml.ip.det_buffer_size_kbytes = dc->debug.crb_alloc_policy * 64; - } else if (context->stream_count >= 3 && upscaled) { - context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; - } - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (!pipe->stream) - continue; + int pipe_cnt; - if (pipe->stream->signal == SIGNAL_TYPE_EDP && dc->debug.seamless_boot_odm_combine && - pipe->stream->apply_seamless_boot_optimization) { - - if (pipe->stream->apply_boot_odm_mode == dm_odm_combine_policy_2to1) { - context->bw_ctx.dml.vba.ODMCombinePolicy = dm_odm_combine_policy_2to1; - break; - } - } - } + DC_FP_START(); + pipe_cnt = dcn314_populate_dml_pipes_from_context_fpu(dc, context, pipes, fast_validate); + DC_FP_END(); return pipe_cnt; } @@ -1912,88 +1672,9 @@ static struct dc_cap_funcs cap_funcs = { static void dcn314_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { - struct clk_limit_table *clk_table = &bw_params->clk_table; - struct _vcs_dpi_voltage_scaling_st *clock_tmp = dcn3_14_soc._clock_tmp; - unsigned int i, closest_clk_lvl; - int max_dispclk_mhz = 0, max_dppclk_mhz = 0; - int j; - - // Default clock levels are used for diags, which may lead to overclocking. - if (!IS_DIAG_DC(dc->ctx->dce_environment)) { - - dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; - dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count; - - if (bw_params->num_channels > 0) - dcn3_14_soc.num_chans = bw_params->num_channels; - - ASSERT(dcn3_14_soc.num_chans); - ASSERT(clk_table->num_entries); - - /* Prepass to find max clocks independent of voltage level. */ - for (i = 0; i < clk_table->num_entries; ++i) { - if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) - max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; - if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) - max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; - } - - for (i = 0; i < clk_table->num_entries; i++) { - /* loop backwards*/ - for (closest_clk_lvl = 0, j = dcn3_14_soc.num_states - 1; j >= 0; j--) { - if ((unsigned int) dcn3_14_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { - closest_clk_lvl = j; - break; - } - } - if (clk_table->num_entries == 1) { - /*smu gives one DPM level, let's take the highest one*/ - closest_clk_lvl = dcn3_14_soc.num_states - 1; - } - - clock_tmp[i].state = i; - - /* Clocks dependent on voltage level. */ - clock_tmp[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; - if (clk_table->num_entries == 1 && - clock_tmp[i].dcfclk_mhz < dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) { - /*SMU fix not released yet*/ - clock_tmp[i].dcfclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz; - } - clock_tmp[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; - clock_tmp[i].socclk_mhz = clk_table->entries[i].socclk_mhz; - - if (clk_table->entries[i].memclk_mhz && clk_table->entries[i].wck_ratio) - clock_tmp[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; - - /* Clocks independent of voltage level. */ - clock_tmp[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : - dcn3_14_soc.clock_limits[closest_clk_lvl].dispclk_mhz; - - clock_tmp[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : - dcn3_14_soc.clock_limits[closest_clk_lvl].dppclk_mhz; - - clock_tmp[i].dram_bw_per_chan_gbps = dcn3_14_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; - clock_tmp[i].dscclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dscclk_mhz; - clock_tmp[i].dtbclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; - clock_tmp[i].phyclk_d18_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; - clock_tmp[i].phyclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_mhz; - } - for (i = 0; i < clk_table->num_entries; i++) - dcn3_14_soc.clock_limits[i] = clock_tmp[i]; - if (clk_table->num_entries) - dcn3_14_soc.num_states = clk_table->num_entries; - } - - if (max_dispclk_mhz) { - dcn3_14_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; - dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; - } - - if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) - dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31); - else - dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31_FPGA); + DC_FP_START(); + dcn314_update_bw_bounding_box_fpu(dc, bw_params); + DC_FP_END(); } static struct resource_funcs dcn314_res_pool_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h index c41108847ce08..0dd3153aa5c17 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.h @@ -29,6 +29,9 @@ #include "core_types.h" +extern struct _vcs_dpi_ip_params_st dcn3_14_ip; +extern struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc; + #define TO_DCN314_RES_POOL(pool)\ container_of(pool, struct dcn314_resource_pool, base) diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h index 39929fa67a510..22849eaa6f243 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h @@ -32,7 +32,6 @@ container_of(pool, struct dcn315_resource_pool, base) extern struct _vcs_dpi_ip_params_st dcn3_15_ip; -extern struct _vcs_dpi_ip_params_st dcn3_15_soc; struct dcn315_resource_pool { struct resource_pool base; diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h index 0dc5a6c13ae7d..aba6d634131b4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h @@ -32,7 +32,6 @@ container_of(pool, struct dcn316_resource_pool, base) extern struct _vcs_dpi_ip_params_st dcn3_16_ip; -extern struct _vcs_dpi_ip_params_st dcn3_16_soc; struct dcn316_resource_pool { struct resource_pool base; diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index 554605ac3ba1a..bb815654dbd22 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -251,6 +251,7 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c uint32_t total_lines = 0; uint32_t lines_per_way = 0; uint32_t num_ways = 0; + uint32_t prev_addr_low = 0; for (i = 0; i < ctx->stream_count; i++) { stream = ctx->streams[i]; @@ -268,10 +269,20 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c plane = ctx->stream_status[i].plane_states[j]; // Calculate total surface size - surface_size = plane->plane_size.surface_pitch * + if (prev_addr_low != plane->address.grph.addr.u.low_part) { + /* if plane address are different from prev FB, then userspace allocated separate FBs*/ + surface_size += plane->plane_size.surface_pitch * plane->plane_size.surface_size.height * (plane->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4); + prev_addr_low = plane->address.grph.addr.u.low_part; + } else { + /* We have the same fb for all the planes. + * Xorg always creates one giant fb that holds all surfaces, + * so allocating it once is sufficient. + * */ + continue; + } // Convert surface size + starting address to number of cache lines required // (alignment accounted for) cache_lines_used += dcn32_cache_lines_for_surface(dc, surface_size, @@ -321,7 +332,9 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable) { union dmub_rb_cmd cmd; - uint8_t ways; + uint8_t ways, i, j; + bool stereo_in_use = false; + struct dc_plane_state *plane = NULL; if (!dc->ctx->dmub_srv) return false; @@ -350,7 +363,23 @@ bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable) * and configure HUBP's to fetch from MALL */ ways = dcn32_calculate_cab_allocation(dc, dc->current_state); - if (ways <= dc->caps.cache_num_ways) { + + /* MALL not supported with Stereo3D. If any plane is using stereo, + * don't try to enter MALL. + */ + for (i = 0; i < dc->current_state->stream_count; i++) { + for (j = 0; j < dc->current_state->stream_status[i].plane_count; j++) { + plane = dc->current_state->stream_status[i].plane_states[j]; + + if (plane->address.type == PLN_ADDR_TYPE_GRPH_STEREO) { + stereo_in_use = true; + break; + } + } + if (stereo_in_use) + break; + } + if (ways <= dc->caps.cache_num_ways && !stereo_in_use) { memset(&cmd, 0, sizeof(cmd)); cmd.cab.header.type = DMUB_CMD__CAB_FOR_SS; cmd.cab.header.sub_type = DMUB_CMD__CAB_DCN_SS_FIT_IN_CAB; @@ -684,9 +713,11 @@ void dcn32_update_mall_sel(struct dc *dc, struct dc_state *context) if (pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { hubp->funcs->hubp_update_mall_sel(hubp, 1, false); } else { + // MALL not supported with Stereo3D hubp->funcs->hubp_update_mall_sel(hubp, num_ways <= dc->caps.cache_num_ways && - pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED ? 2 : 0, + pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED && + pipe->plane_state->address.type != PLN_ADDR_TYPE_GRPH_STEREO ? 2 : 0, cache_cursor); } } diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c index ca04bf3660d52..bd189802c7902 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c @@ -282,7 +282,7 @@ static struct timing_generator_funcs dcn32_tg_funcs = { .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable, .enable_optc_clock = optc1_enable_optc_clock, .set_vrr_m_const = optc3_set_vrr_m_const, - .set_drr = optc31_set_drr, // TODO: Update to optc32_set_drr once FW headers are promoted + .set_drr = optc32_set_drr, .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, .set_vtotal_min_max = optc3_set_vtotal_min_max, .set_static_screen_control = optc1_set_static_screen_control, diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 60f130a84eebb..1a990e94070ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -869,7 +869,7 @@ static const struct dc_debug_options debug_defaults_drv = { } }, .use_max_lb = true, - .force_disable_subvp = true, + .force_disable_subvp = false, .exit_idle_opt_for_cursor_updates = true, .enable_single_display_2to1_odm_policy = true, .enable_dp_dig_pixel_rate_div_policy = 1, @@ -2061,6 +2061,7 @@ static bool dcn32_resource_construct( dc->caps.max_cab_allocation_bytes = 67108864; // 64MB = 1024 * 1024 * 64 dc->caps.subvp_fw_processing_delay_us = 15; dc->caps.subvp_prefetch_end_to_mall_start_us = 15; + dc->caps.subvp_swath_height_margin_lines = 16; dc->caps.subvp_pstate_allow_width_us = 20; dc->caps.subvp_vertical_int_margin_us = 30; diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index 7af19db015143..ef1eee2e54d9a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -868,7 +868,7 @@ static const struct dc_debug_options debug_defaults_drv = { } }, .use_max_lb = true, - .force_disable_subvp = true, + .force_disable_subvp = false, .exit_idle_opt_for_cursor_updates = true, .enable_single_display_2to1_odm_policy = true, .enable_dp_dig_pixel_rate_div_policy = 1, @@ -1667,6 +1667,7 @@ static bool dcn321_resource_construct( dc->caps.max_cab_allocation_bytes = 33554432; // 32MB = 1024 * 1024 * 32 dc->caps.subvp_fw_processing_delay_us = 15; dc->caps.subvp_prefetch_end_to_mall_start_us = 15; + dc->caps.subvp_swath_height_margin_lines = 16; dc->caps.subvp_pstate_allow_width_us = 20; dc->caps.max_slave_planes = 1; diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index cc73c9763e666..3469dd25fcc10 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -59,7 +59,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_ccflags) @@ -69,6 +68,7 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(fram CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn314/dcn314_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_ccflags) $(frame_warn_flag) @@ -80,7 +80,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn303/dcn303_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_ccflags) -Wno-tautological-compare @@ -129,6 +128,7 @@ DML += dcn321/dcn321_fpu.o DML += dcn301/dcn301_fpu.o DML += dcn302/dcn302_fpu.o DML += dcn303/dcn303_fpu.o +DML += dcn314/dcn314_fpu.o DML += dsc/rc_calc_fpu.o DML += calcs/dcn_calcs.o calcs/dcn_calc_math.o calcs/dcn_calc_auto.o endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index ca44df4fca747..d680f1c5b69f8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -30,6 +30,7 @@ #include "dchubbub.h" #include "dcn20/dcn20_resource.h" #include "dcn21/dcn21_resource.h" +#include "clk_mgr/dcn21/rn_clk_mgr.h" #include "dcn20_fpu.h" @@ -2233,6 +2234,7 @@ static struct _vcs_dpi_voltage_scaling_st construct_low_pstate_lvl(struct clk_li void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { + struct _vcs_dpi_voltage_scaling_st *s = dc->scratch.update_bw_bounding_box.clock_limits; struct dcn21_resource_pool *pool = TO_DCN21_RES_POOL(dc->res_pool); struct clk_limit_table *clk_table = &bw_params->clk_table; unsigned int i, closest_clk_lvl = 0, k = 0; @@ -2246,8 +2248,7 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params ASSERT(clk_table->num_entries); /* Copy dcn2_1_soc.clock_limits to clock_limits to avoid copying over null states later */ - memcpy(&dcn2_1_soc._clock_tmp, &dcn2_1_soc.clock_limits, - sizeof(dcn2_1_soc.clock_limits)); + memcpy(s, dcn2_1_soc.clock_limits, sizeof(dcn2_1_soc.clock_limits)); for (i = 0; i < clk_table->num_entries; i++) { /* loop backwards*/ @@ -2262,25 +2263,25 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params if (i == 1) k++; - dcn2_1_soc._clock_tmp[k].state = k; - dcn2_1_soc._clock_tmp[k].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; - dcn2_1_soc._clock_tmp[k].fabricclk_mhz = clk_table->entries[i].fclk_mhz; - dcn2_1_soc._clock_tmp[k].socclk_mhz = clk_table->entries[i].socclk_mhz; - dcn2_1_soc._clock_tmp[k].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2; - - dcn2_1_soc._clock_tmp[k].dispclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; - dcn2_1_soc._clock_tmp[k].dppclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; - dcn2_1_soc._clock_tmp[k].dram_bw_per_chan_gbps = dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; - dcn2_1_soc._clock_tmp[k].dscclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; - dcn2_1_soc._clock_tmp[k].dtbclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; - dcn2_1_soc._clock_tmp[k].phyclk_d18_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; - dcn2_1_soc._clock_tmp[k].phyclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; + s[k].state = k; + s[k].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; + s[k].fabricclk_mhz = clk_table->entries[i].fclk_mhz; + s[k].socclk_mhz = clk_table->entries[i].socclk_mhz; + s[k].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2; + + s[k].dispclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; + s[k].dppclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; + s[k].dram_bw_per_chan_gbps = + dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; + s[k].dscclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; + s[k].dtbclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; + s[k].phyclk_d18_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; + s[k].phyclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; k++; } - memcpy(&dcn2_1_soc.clock_limits, &dcn2_1_soc._clock_tmp, - sizeof(dcn2_1_soc.clock_limits)); + memcpy(dcn2_1_soc.clock_limits, s, sizeof(dcn2_1_soc.clock_limits)); if (clk_table->num_entries) { dcn2_1_soc.num_states = clk_table->num_entries + 1; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c index 7ef66e511ec8e..241d28d0b7fb1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c @@ -26,6 +26,7 @@ #include "clk_mgr.h" #include "dcn20/dcn20_resource.h" #include "dcn301/dcn301_resource.h" +#include "clk_mgr/dcn301/vg_clk_mgr.h" #include "dml/dcn20/dcn20_fpu.h" #include "dcn301_fpu.h" @@ -321,6 +322,7 @@ static void calculate_wm_set_for_vlevel(int vlevel, void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { + struct _vcs_dpi_voltage_scaling_st *s = dc->scratch.update_bw_bounding_box.clock_limits; struct dcn301_resource_pool *pool = TO_DCN301_RES_POOL(dc->res_pool); struct clk_limit_table *clk_table = &bw_params->clk_table; unsigned int i, closest_clk_lvl; @@ -328,8 +330,7 @@ void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param dc_assert_fp_enabled(); - memcpy(&dcn3_01_soc._clock_tmp, &dcn3_01_soc.clock_limits, - sizeof(dcn3_01_soc.clock_limits)); + memcpy(s, dcn3_01_soc.clock_limits, sizeof(dcn3_01_soc.clock_limits)); /* Default clock levels are used for diags, which may lead to overclocking. */ if (!IS_DIAG_DC(dc->ctx->dce_environment)) { @@ -347,31 +348,33 @@ void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param } } - dcn3_01_soc._clock_tmp[i].state = i; - dcn3_01_soc._clock_tmp[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; - dcn3_01_soc._clock_tmp[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; - dcn3_01_soc._clock_tmp[i].socclk_mhz = clk_table->entries[i].socclk_mhz; - dcn3_01_soc._clock_tmp[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2; - - dcn3_01_soc._clock_tmp[i].dispclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dispclk_mhz; - dcn3_01_soc._clock_tmp[i].dppclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dppclk_mhz; - dcn3_01_soc._clock_tmp[i].dram_bw_per_chan_gbps = dcn3_01_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; - dcn3_01_soc._clock_tmp[i].dscclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dscclk_mhz; - dcn3_01_soc._clock_tmp[i].dtbclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; - dcn3_01_soc._clock_tmp[i].phyclk_d18_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; - dcn3_01_soc._clock_tmp[i].phyclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_mhz; + s[i].state = i; + s[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; + s[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; + s[i].socclk_mhz = clk_table->entries[i].socclk_mhz; + s[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2; + + s[i].dispclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dispclk_mhz; + s[i].dppclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dppclk_mhz; + s[i].dram_bw_per_chan_gbps = + dcn3_01_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; + s[i].dscclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dscclk_mhz; + s[i].dtbclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; + s[i].phyclk_d18_mhz = + dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; + s[i].phyclk_mhz = dcn3_01_soc.clock_limits[closest_clk_lvl].phyclk_mhz; } if (clk_table->num_entries) { dcn3_01_soc.num_states = clk_table->num_entries; /* duplicate last level */ - dcn3_01_soc._clock_tmp[dcn3_01_soc.num_states] = dcn3_01_soc.clock_limits[dcn3_01_soc.num_states - 1]; - dcn3_01_soc._clock_tmp[dcn3_01_soc.num_states].state = dcn3_01_soc.num_states; + s[dcn3_01_soc.num_states] = + dcn3_01_soc.clock_limits[dcn3_01_soc.num_states - 1]; + s[dcn3_01_soc.num_states].state = dcn3_01_soc.num_states; } } - memcpy(&dcn3_01_soc.clock_limits, &dcn3_01_soc._clock_tmp, - sizeof(dcn3_01_soc.clock_limits)); + memcpy(dcn3_01_soc.clock_limits, s, sizeof(dcn3_01_soc.clock_limits)); dcn3_01_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c index e36cfa5985ea9..0e62eb823e343 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c @@ -25,6 +25,9 @@ #include "resource.h" #include "clk_mgr.h" +#include "dcn31/dcn31_resource.h" +#include "dcn315/dcn315_resource.h" +#include "dcn316/dcn316_resource.h" #include "dml/dcn20/dcn20_fpu.h" #include "dcn31_fpu.h" @@ -114,7 +117,7 @@ struct _vcs_dpi_ip_params_st dcn3_1_ip = { .dcc_supported = true, }; -struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = { +static struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = { /*TODO: correct dispclk/dppclk voltage level determination*/ .clock_limits = { { @@ -259,7 +262,7 @@ struct _vcs_dpi_ip_params_st dcn3_15_ip = { .dcc_supported = true, }; -struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = { +static struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = { .sr_exit_time_us = 9.0, .sr_enter_plus_exit_time_us = 11.0, .sr_exit_z8_time_us = 50.0, @@ -355,7 +358,7 @@ struct _vcs_dpi_ip_params_st dcn3_16_ip = { .dcc_supported = true, }; -struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = { +static struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = { /*TODO: correct dispclk/dppclk voltage level determination*/ .clock_limits = { { @@ -594,14 +597,14 @@ void dcn31_calculate_wm_and_dlg_fp( void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { + struct _vcs_dpi_voltage_scaling_st *s = dc->scratch.update_bw_bounding_box.clock_limits; struct clk_limit_table *clk_table = &bw_params->clk_table; unsigned int i, closest_clk_lvl; int j; dc_assert_fp_enabled(); - memcpy(&dcn3_1_soc._clock_tmp, &dcn3_1_soc.clock_limits, - sizeof(dcn3_1_soc.clock_limits)); + memcpy(s, dcn3_1_soc.clock_limits, sizeof(dcn3_1_soc.clock_limits)); // Default clock levels are used for diags, which may lead to overclocking. if (!IS_DIAG_DC(dc->ctx->dce_environment)) { @@ -630,34 +633,36 @@ void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params } } - dcn3_1_soc._clock_tmp[i].state = i; + s[i].state = i; /* Clocks dependent on voltage level. */ - dcn3_1_soc._clock_tmp[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; - dcn3_1_soc._clock_tmp[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; - dcn3_1_soc._clock_tmp[i].socclk_mhz = clk_table->entries[i].socclk_mhz; - dcn3_1_soc._clock_tmp[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; + s[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; + s[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; + s[i].socclk_mhz = clk_table->entries[i].socclk_mhz; + s[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * + 2 * clk_table->entries[i].wck_ratio; /* Clocks independent of voltage level. */ - dcn3_1_soc._clock_tmp[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : + s[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : dcn3_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; - dcn3_1_soc._clock_tmp[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : + s[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : dcn3_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; - dcn3_1_soc._clock_tmp[i].dram_bw_per_chan_gbps = dcn3_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; - dcn3_1_soc._clock_tmp[i].dscclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; - dcn3_1_soc._clock_tmp[i].dtbclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; - dcn3_1_soc._clock_tmp[i].phyclk_d18_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; - dcn3_1_soc._clock_tmp[i].phyclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; + s[i].dram_bw_per_chan_gbps = + dcn3_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; + s[i].dscclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; + s[i].dtbclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; + s[i].phyclk_d18_mhz = + dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; + s[i].phyclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; } if (clk_table->num_entries) { dcn3_1_soc.num_states = clk_table->num_entries; } } - memcpy(&dcn3_1_soc.clock_limits, &dcn3_1_soc._clock_tmp, - sizeof(dcn3_1_soc.clock_limits)); + memcpy(dcn3_1_soc.clock_limits, s, sizeof(dcn3_1_soc.clock_limits)); dcn3_1_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; @@ -724,6 +729,7 @@ void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { + struct _vcs_dpi_voltage_scaling_st *s = dc->scratch.update_bw_bounding_box.clock_limits; struct clk_limit_table *clk_table = &bw_params->clk_table; unsigned int i, closest_clk_lvl; int max_dispclk_mhz = 0, max_dppclk_mhz = 0; @@ -731,8 +737,7 @@ void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param dc_assert_fp_enabled(); - memcpy(&dcn3_16_soc._clock_tmp, &dcn3_16_soc.clock_limits, - sizeof(dcn3_16_soc.clock_limits)); + memcpy(s, dcn3_16_soc.clock_limits, sizeof(dcn3_16_soc.clock_limits)); // Default clock levels are used for diags, which may lead to overclocking. if (!IS_DIAG_DC(dc->ctx->dce_environment)) { @@ -754,7 +759,8 @@ void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param for (i = 0; i < clk_table->num_entries; i++) { /* loop backwards*/ for (closest_clk_lvl = 0, j = dcn3_16_soc.num_states - 1; j >= 0; j--) { - if ((unsigned int) dcn3_16_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { + if ((unsigned int) dcn3_16_soc.clock_limits[j].dcfclk_mhz <= + clk_table->entries[i].dcfclk_mhz) { closest_clk_lvl = j; break; } @@ -765,39 +771,43 @@ void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param closest_clk_lvl = dcn3_16_soc.num_states - 1; } - dcn3_16_soc._clock_tmp[i].state = i; + s[i].state = i; /* Clocks dependent on voltage level. */ - dcn3_16_soc._clock_tmp[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; + s[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; if (clk_table->num_entries == 1 && - dcn3_16_soc._clock_tmp[i].dcfclk_mhz < dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) { + s[i].dcfclk_mhz < + dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) { /*SMU fix not released yet*/ - dcn3_16_soc._clock_tmp[i].dcfclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz; + s[i].dcfclk_mhz = + dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz; } - dcn3_16_soc._clock_tmp[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; - dcn3_16_soc._clock_tmp[i].socclk_mhz = clk_table->entries[i].socclk_mhz; - dcn3_16_soc._clock_tmp[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; + s[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; + s[i].socclk_mhz = clk_table->entries[i].socclk_mhz; + s[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * + 2 * clk_table->entries[i].wck_ratio; /* Clocks independent of voltage level. */ - dcn3_16_soc._clock_tmp[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : + s[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : dcn3_16_soc.clock_limits[closest_clk_lvl].dispclk_mhz; - dcn3_16_soc._clock_tmp[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : + s[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : dcn3_16_soc.clock_limits[closest_clk_lvl].dppclk_mhz; - dcn3_16_soc._clock_tmp[i].dram_bw_per_chan_gbps = dcn3_16_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; - dcn3_16_soc._clock_tmp[i].dscclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dscclk_mhz; - dcn3_16_soc._clock_tmp[i].dtbclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; - dcn3_16_soc._clock_tmp[i].phyclk_d18_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; - dcn3_16_soc._clock_tmp[i].phyclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_mhz; + s[i].dram_bw_per_chan_gbps = + dcn3_16_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; + s[i].dscclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dscclk_mhz; + s[i].dtbclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; + s[i].phyclk_d18_mhz = + dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; + s[i].phyclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_mhz; } if (clk_table->num_entries) { dcn3_16_soc.num_states = clk_table->num_entries; } } - memcpy(&dcn3_16_soc.clock_limits, &dcn3_16_soc._clock_tmp, - sizeof(dcn3_16_soc.clock_limits)); + memcpy(dcn3_16_soc.clock_limits, s, sizeof(dcn3_16_soc.clock_limits)); if (max_dispclk_mhz) { dcn3_16_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c new file mode 100644 index 0000000000000..c80307a6af1bf --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c @@ -0,0 +1,376 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "clk_mgr.h" +#include "resource.h" +#include "dcn31/dcn31_hubbub.h" +#include "dcn314_fpu.h" +#include "dml/dcn20/dcn20_fpu.h" +#include "dml/display_mode_vba.h" + +struct _vcs_dpi_ip_params_st dcn3_14_ip = { + .VBlankNomDefaultUS = 668, + .gpuvm_enable = 1, + .gpuvm_max_page_table_levels = 1, + .hostvm_enable = 1, + .hostvm_max_page_table_levels = 2, + .rob_buffer_size_kbytes = 64, + .det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE, + .config_return_buffer_size_in_kbytes = 1792, + .compressed_buffer_segment_size_in_kbytes = 64, + .meta_fifo_size_in_kentries = 32, + .zero_size_buffer_entries = 512, + .compbuf_reserved_space_64b = 256, + .compbuf_reserved_space_zs = 64, + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .meta_chunk_size_kbytes = 2, + .min_meta_chunk_size_bytes = 256, + .writeback_chunk_size_kbytes = 8, + .ptoi_supported = false, + .num_dsc = 4, + .maximum_dsc_bits_per_component = 10, + .dsc422_native_support = false, + .is_line_buffer_bpp_fixed = true, + .line_buffer_fixed_bpp = 48, + .line_buffer_size_bits = 789504, + .max_line_buffer_lines = 12, + .writeback_interface_buffer_size_kbytes = 90, + .max_num_dpp = 4, + .max_num_otg = 4, + .max_num_hdmi_frl_outputs = 1, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 6, + .max_vscl_ratio = 6, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dpte_buffer_size_in_pte_reqs_luma = 64, + .dpte_buffer_size_in_pte_reqs_chroma = 34, + .dispclk_ramp_margin_percent = 1, + .max_inter_dcn_tile_repeaters = 8, + .cursor_buffer_size = 16, + .cursor_chunk_size = 2, + .writeback_line_buffer_buffer_size = 0, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_max_hscl_taps = 1, + .writeback_max_vscl_taps = 1, + .dppclk_delay_subtotal = 46, + .dppclk_delay_scl = 50, + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_cnvc_formatter = 27, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 119, + .dynamic_metadata_vm_enabled = false, + .odm_combine_4to1_supported = false, + .dcc_supported = true, +}; + +struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = { + /*TODO: correct dispclk/dppclk voltage level determination*/ + .clock_limits = { + { + .state = 0, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 600.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 186.0, + .dtbclk_mhz = 600.0, + }, + { + .state = 1, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 209.0, + .dtbclk_mhz = 600.0, + }, + { + .state = 2, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 209.0, + .dtbclk_mhz = 600.0, + }, + { + .state = 3, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 371.0, + .dtbclk_mhz = 600.0, + }, + { + .state = 4, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 417.0, + .dtbclk_mhz = 600.0, + }, + }, + .num_states = 5, + .sr_exit_time_us = 9.0, + .sr_enter_plus_exit_time_us = 11.0, + .sr_exit_z8_time_us = 442.0, + .sr_enter_plus_exit_z8_time_us = 560.0, + .writeback_latency_us = 12.0, + .dram_channel_width_bytes = 4, + .round_trip_ping_latency_dcfclk_cycles = 106, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_sdp_bw_after_urgent = 80.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, + .max_avg_sdp_bw_use_normal_percent = 60.0, + .max_avg_dram_bw_use_normal_percent = 60.0, + .fabric_datapath_to_dcn_data_return_bytes = 32, + .return_bus_width_bytes = 64, + .downspread_percent = 0.38, + .dcn_downspread_percent = 0.5, + .gpuvm_min_page_size_bytes = 4096, + .hostvm_min_page_size_bytes = 4096, + .do_urgent_latency_adjustment = false, + .urgent_latency_adjustment_fabric_clock_component_us = 0, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, +}; + + +void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params) +{ + struct clk_limit_table *clk_table = &bw_params->clk_table; + struct _vcs_dpi_voltage_scaling_st *clock_limits = + dcn3_14_soc.clock_limits; + unsigned int i, closest_clk_lvl; + int max_dispclk_mhz = 0, max_dppclk_mhz = 0; + int j; + + dc_assert_fp_enabled(); + + // Default clock levels are used for diags, which may lead to overclocking. + if (!IS_DIAG_DC(dc->ctx->dce_environment)) { + + dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; + dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count; + + if (bw_params->num_channels > 0) + dcn3_14_soc.num_chans = bw_params->num_channels; + + ASSERT(dcn3_14_soc.num_chans); + ASSERT(clk_table->num_entries); + + /* Prepass to find max clocks independent of voltage level. */ + for (i = 0; i < clk_table->num_entries; ++i) { + if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; + if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; + } + + for (i = 0; i < clk_table->num_entries; i++) { + /* loop backwards*/ + for (closest_clk_lvl = 0, j = dcn3_14_soc.num_states - 1; j >= 0; j--) { + if ((unsigned int) dcn3_14_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { + closest_clk_lvl = j; + break; + } + } + if (clk_table->num_entries == 1) { + /*smu gives one DPM level, let's take the highest one*/ + closest_clk_lvl = dcn3_14_soc.num_states - 1; + } + + clock_limits[i].state = i; + + /* Clocks dependent on voltage level. */ + clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; + if (clk_table->num_entries == 1 && + clock_limits[i].dcfclk_mhz < dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) { + /*SMU fix not released yet*/ + clock_limits[i].dcfclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dcfclk_mhz; + } + clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; + clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz; + + if (clk_table->entries[i].memclk_mhz && clk_table->entries[i].wck_ratio) + clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; + + /* Clocks independent of voltage level. */ + clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : + dcn3_14_soc.clock_limits[closest_clk_lvl].dispclk_mhz; + + clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : + dcn3_14_soc.clock_limits[closest_clk_lvl].dppclk_mhz; + + clock_limits[i].dram_bw_per_chan_gbps = dcn3_14_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; + clock_limits[i].dscclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dscclk_mhz; + clock_limits[i].dtbclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; + clock_limits[i].phyclk_d18_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; + clock_limits[i].phyclk_mhz = dcn3_14_soc.clock_limits[closest_clk_lvl].phyclk_mhz; + } + for (i = 0; i < clk_table->num_entries; i++) + dcn3_14_soc.clock_limits[i] = clock_limits[i]; + if (clk_table->num_entries) { + dcn3_14_soc.num_states = clk_table->num_entries; + } + } + + if (max_dispclk_mhz) { + dcn3_14_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; + dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; + } + + if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) + dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31); + else + dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN31_FPGA); +} + +static bool is_dual_plane(enum surface_pixel_format format) +{ + return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA; +} + +int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate) +{ + int i, pipe_cnt; + struct resource_context *res_ctx = &context->res_ctx; + struct pipe_ctx *pipe; + bool upscaled = false; + + dc_assert_fp_enabled(); + + dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); + + for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { + struct dc_crtc_timing *timing; + + if (!res_ctx->pipe_ctx[i].stream) + continue; + pipe = &res_ctx->pipe_ctx[i]; + timing = &pipe->stream->timing; + + if (dc_extended_blank_supported(dc) && pipe->stream->adjust.v_total_max == pipe->stream->adjust.v_total_min + && pipe->stream->adjust.v_total_min > timing->v_total) + pipes[pipe_cnt].pipe.dest.vtotal = pipe->stream->adjust.v_total_min; + + if (pipe->plane_state && + (pipe->plane_state->src_rect.height < pipe->plane_state->dst_rect.height || + pipe->plane_state->src_rect.width < pipe->plane_state->dst_rect.width)) + upscaled = true; + + /* + * Immediate flip can be set dynamically after enabling the plane. + * We need to require support for immediate flip or underflow can be + * intermittently experienced depending on peak b/w requirements. + */ + pipes[pipe_cnt].pipe.src.immediate_flip = true; + + pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; + pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active; + pipes[pipe_cnt].pipe.src.gpuvm = true; + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; + pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; + pipes[pipe_cnt].pipe.src.dcc_rate = 3; + pipes[pipe_cnt].dout.dsc_input_bpc = 0; + + if (pipes[pipe_cnt].dout.dsc_enable) { + switch (timing->display_color_depth) { + case COLOR_DEPTH_888: + pipes[pipe_cnt].dout.dsc_input_bpc = 8; + break; + case COLOR_DEPTH_101010: + pipes[pipe_cnt].dout.dsc_input_bpc = 10; + break; + case COLOR_DEPTH_121212: + pipes[pipe_cnt].dout.dsc_input_bpc = 12; + break; + default: + ASSERT(0); + break; + } + } + + pipe_cnt++; + } + context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_14_DEFAULT_DET_SIZE; + + dc->config.enable_4to1MPC = false; + if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) { + if (is_dual_plane(pipe->plane_state->format) + && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) { + dc->config.enable_4to1MPC = true; + } else if (!is_dual_plane(pipe->plane_state->format) && pipe->plane_state->src_rect.width <= 5120) { + /* Limit to 5k max to avoid forced pipe split when there is not enough detile for swath */ + context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; + pipes[0].pipe.src.unbounded_req_mode = true; + } + } else if (context->stream_count >= dc->debug.crb_alloc_policy_min_disp_count + && dc->debug.crb_alloc_policy > DET_SIZE_DEFAULT) { + context->bw_ctx.dml.ip.det_buffer_size_kbytes = dc->debug.crb_alloc_policy * 64; + } else if (context->stream_count >= 3 && upscaled) { + context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; + } + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; + + if (!pipe->stream) + continue; + + if (pipe->stream->signal == SIGNAL_TYPE_EDP && dc->debug.seamless_boot_odm_combine && + pipe->stream->apply_seamless_boot_optimization) { + + if (pipe->stream->apply_boot_odm_mode == dm_odm_combine_policy_2to1) { + context->bw_ctx.dml.vba.ODMCombinePolicy = dm_odm_combine_policy_2to1; + break; + } + } + } + + return pipe_cnt; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h new file mode 100644 index 0000000000000..d32c5bb99f4c9 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DCN314_FPU_H__ +#define __DCN314_FPU_H__ + +#define DCN3_14_DEFAULT_DET_SIZE 384 +#define DCN3_14_MAX_DET_SIZE 384 +#define DCN3_14_MIN_COMPBUF_SIZE_KB 128 +#define DCN3_14_CRB_SEGMENT_SIZE_KB 64 + +void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params); +int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + bool fast_validate); + +#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 02c6e06b08aaa..a51e74344698e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -473,8 +473,11 @@ void dcn32_set_phantom_stream_timing(struct dc *dc, // DML calculation for MALL region doesn't take into account FW delay // and required pstate allow width for multi-display cases + /* Add 16 lines margin to the MALL REGION because SUB_VP_START_LINE must be aligned + * to 2 swaths (i.e. 16 lines) + */ phantom_vactive = get_subviewport_lines_needed_in_mall(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx) + - pstate_width_fw_delay_lines; + pstate_width_fw_delay_lines + dc->caps.subvp_swath_height_margin_lines; // For backporch of phantom pipe, use vstartup of the main pipe phantom_bp = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); @@ -983,9 +986,15 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, * DML favors voltage over p-state, but we're more interested in * supporting p-state over voltage. We can't support p-state in * prefetch mode > 0 so try capping the prefetch mode to start. + * Override present for testing. */ - context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = + if (dc->debug.dml_disallow_alternate_prefetch_modes) + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = dm_prefetch_support_uclk_fclk_and_stutter; + else + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = + dm_prefetch_support_uclk_fclk_and_stutter_if_possible; + *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); /* This may adjust vlevel and maxMpcComb */ if (*vlevel < context->bw_ctx.dml.soc.num_states) @@ -1014,7 +1023,9 @@ static void dcn32_full_validate_bw_helper(struct dc *dc, * will not allow for switch in VBLANK. The DRR display must have it's VBLANK stretched * enough to support MCLK switching. */ - if (*vlevel == context->bw_ctx.dml.soc.num_states) { + if (*vlevel == context->bw_ctx.dml.soc.num_states && + context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final == + dm_prefetch_support_uclk_fclk_and_stutter) { context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = dm_prefetch_support_stutter; /* There are params (such as FabricClock) that need to be recalculated @@ -1335,7 +1346,8 @@ bool dcn32_internal_validate_bw( int split[MAX_PIPES] = { 0 }; bool merge[MAX_PIPES] = { false }; bool newly_split[MAX_PIPES] = { false }; - int pipe_cnt, i, pipe_idx, vlevel; + int pipe_cnt, i, pipe_idx; + int vlevel = context->bw_ctx.dml.soc.num_states; struct vba_vars_st *vba = &context->bw_ctx.dml.vba; ASSERT(pipes); @@ -1362,17 +1374,22 @@ bool dcn32_internal_validate_bw( DC_FP_END(); } - if (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states || - vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) { + if (fast_validate || + (dc->debug.dml_disallow_alternate_prefetch_modes && + (vlevel == context->bw_ctx.dml.soc.num_states || + vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported))) { /* - * If mode is unsupported or there's still no p-state support then - * fall back to favoring voltage. + * If dml_disallow_alternate_prefetch_modes is false, then we have already + * tried alternate prefetch modes during full validation. + * + * If mode is unsupported or there is no p-state support, then + * fall back to favouring voltage. * * We don't actually support prefetch mode 2, so require that we * at least support prefetch mode 1. */ context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final = - dm_prefetch_support_stutter; + dm_prefetch_support_stutter; vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); if (vlevel < context->bw_ctx.dml.soc.num_states) { @@ -2082,6 +2099,13 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; } + if ((int)(dcn3_2_soc.fclk_change_latency_us * 1000) + != dc->bb_overrides.fclk_clock_change_latency_ns + && dc->bb_overrides.fclk_clock_change_latency_ns) { + dcn3_2_soc.fclk_change_latency_us = + dc->bb_overrides.fclk_clock_change_latency_ns / 1000; + } + if ((int)(dcn3_2_soc.dummy_pstate_latency_us * 1000) != dc->bb_overrides.dummy_clock_change_latency_ns && dc->bb_overrides.dummy_clock_change_latency_ns) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index e9204c711cb96..cb2025771646b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -65,6 +65,12 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman int iteration; double MaxTotalRDBandwidth; unsigned int NextPrefetchMode; + double MaxTotalRDBandwidthNoUrgentBurst = 0.0; + bool DestinationLineTimesForPrefetchLessThan2 = false; + bool VRatioPrefetchMoreThanMax = false; + double TWait; + double TotalWRBandwidth = 0; + double WRBandwidth = 0; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: --- START ---\n", __func__); @@ -710,11 +716,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman NextPrefetchMode = mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb]; do { - double MaxTotalRDBandwidthNoUrgentBurst = 0.0; - bool DestinationLineTimesForPrefetchLessThan2 = false; - bool VRatioPrefetchMoreThanMax = false; - double dummy_unit_vector[DC__NUM_DPP__MAX]; - MaxTotalRDBandwidth = 0; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, mode_lib->vba.VStartupLines); @@ -723,41 +724,39 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman /* NOTE PerfetchMode variable is invalid in DAL as per the input received. * Hence the direction is to use PrefetchModePerState. */ - double TWait = dml32_CalculateTWait( - mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], - mode_lib->vba.UsesMALLForPStateChange[k], - mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal, - mode_lib->vba.DRRDisplay[k], - mode_lib->vba.DRAMClockChangeLatency, - mode_lib->vba.FCLKChangeLatency, v->UrgentLatency, - mode_lib->vba.SREnterPlusExitTime); - - DmlPipe myPipe; - - myPipe.Dppclk = mode_lib->vba.DPPCLK[k]; - myPipe.Dispclk = mode_lib->vba.DISPCLK; - myPipe.PixelClock = mode_lib->vba.PixelClock[k]; - myPipe.DCFClkDeepSleep = v->DCFCLKDeepSleep; - myPipe.DPPPerSurface = mode_lib->vba.DPPPerPlane[k]; - myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; - myPipe.SourceRotation = mode_lib->vba.SourceRotation[k]; - myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k]; - myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k]; - myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k]; - myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k]; - myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; - myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; - myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; - myPipe.HTotal = mode_lib->vba.HTotal[k]; - myPipe.HActive = mode_lib->vba.HActive[k]; - myPipe.DCCEnable = mode_lib->vba.DCCEnable[k]; - myPipe.ODMMode = mode_lib->vba.ODMCombineEnabled[k]; - myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k]; - myPipe.BytePerPixelY = v->BytePerPixelY[k]; - myPipe.BytePerPixelC = v->BytePerPixelC[k]; - myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP; + TWait = dml32_CalculateTWait( + mode_lib->vba.PrefetchModePerState[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb], + mode_lib->vba.UsesMALLForPStateChange[k], + mode_lib->vba.SynchronizeDRRDisplaysForUCLKPStateChangeFinal, + mode_lib->vba.DRRDisplay[k], + mode_lib->vba.DRAMClockChangeLatency, + mode_lib->vba.FCLKChangeLatency, v->UrgentLatency, + mode_lib->vba.SREnterPlusExitTime); + + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.Dppclk = mode_lib->vba.DPPCLK[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.Dispclk = mode_lib->vba.DISPCLK; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.PixelClock = mode_lib->vba.PixelClock[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DCFClkDeepSleep = v->DCFCLKDeepSleep; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DPPPerSurface = mode_lib->vba.DPPPerPlane[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.SourceRotation = mode_lib->vba.SourceRotation[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.HTotal = mode_lib->vba.HTotal[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.HActive = mode_lib->vba.HActive[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.DCCEnable = mode_lib->vba.DCCEnable[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ODMMode = mode_lib->vba.ODMCombineEnabled[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelY = v->BytePerPixelY[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.BytePerPixelC = v->BytePerPixelC[k]; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP; v->ErrorResult[k] = dml32_CalculatePrefetchSchedule(v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.HostVMInefficiencyFactor, - &myPipe, v->DSCDelay[k], + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.myPipe, v->DSCDelay[k], mode_lib->vba.DPPCLKDelaySubtotal + mode_lib->vba.DPPCLKDelayCNVCFormater, mode_lib->vba.DPPCLKDelaySCL, mode_lib->vba.DPPCLKDelaySCLLBOnly, @@ -906,8 +905,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman #endif { - double dummy_single[1]; - dml32_CalculatePrefetchBandwithSupport( mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.ReturnBW, @@ -931,16 +928,14 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman /* output */ &MaxTotalRDBandwidth, - &dummy_single[0], + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], &v->PrefetchModeSupported); } for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) - dummy_unit_vector[k] = 1.0; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector[k] = 1.0; { - double dummy_single[1]; - bool dummy_boolean[1]; dml32_CalculatePrefetchBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.ReturnBW, v->NoUrgentLatencyHidingPre, @@ -954,17 +949,17 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->cursor_bw_pre, v->prefetch_vmrow_bw, mode_lib->vba.DPPPerPlane, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, /* output */ - &dummy_single[0], + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], &v->FractionOfUrgentBandwidth, - &dummy_boolean[0]); + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean); } if (VRatioPrefetchMoreThanMax != false || DestinationLineTimesForPrefetchLessThan2 != false) { @@ -1047,8 +1042,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman } { - double dummy_single[2]; - bool dummy_boolean[1]; dml32_CalculateImmediateFlipBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.ReturnBW, mode_lib->vba.ImmediateFlipRequirement, @@ -1072,7 +1065,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman /* output */ &v->total_dcn_read_bw_with_flip, // Single *TotalBandwidth - &dummy_single[0], // Single *FractionOfUrgentBandwidth + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], // Single *FractionOfUrgentBandwidth &v->ImmediateFlipSupported); // Boolean *ImmediateFlipBandwidthSupport dml32_CalculateImmediateFlipBandwithSupport(mode_lib->vba.NumberOfActiveSurfaces, @@ -1089,17 +1082,17 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->cursor_bw_pre, v->prefetch_vmrow_bw, mode_lib->vba.DPPPerPlane, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, - dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_unit_vector, /* output */ - &dummy_single[1], // Single *TotalBandwidth + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[1], // Single *TotalBandwidth &v->FractionOfUrgentBandwidthImmediateFlip, // Single *FractionOfUrgentBandwidth - &dummy_boolean[0]); // Boolean *ImmediateFlipBandwidthSupport + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean); // Boolean *ImmediateFlipBandwidthSupport } for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { @@ -1157,22 +1150,17 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman //Watermarks and NB P-State/DRAM Clock Change Support { - SOCParametersList mmSOCParameters; - enum clock_change_support dummy_dramchange_support; - enum dm_fclock_change_support dummy_fclkchange_support; - bool dummy_USRRetrainingSupport; - - mmSOCParameters.UrgentLatency = v->UrgentLatency; - mmSOCParameters.ExtraLatency = v->UrgentExtraLatency; - mmSOCParameters.WritebackLatency = mode_lib->vba.WritebackLatency; - mmSOCParameters.DRAMClockChangeLatency = mode_lib->vba.DRAMClockChangeLatency; - mmSOCParameters.FCLKChangeLatency = mode_lib->vba.FCLKChangeLatency; - mmSOCParameters.SRExitTime = mode_lib->vba.SRExitTime; - mmSOCParameters.SREnterPlusExitTime = mode_lib->vba.SREnterPlusExitTime; - mmSOCParameters.SRExitZ8Time = mode_lib->vba.SRExitZ8Time; - mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->vba.SREnterPlusExitZ8Time; - mmSOCParameters.USRRetrainingLatency = mode_lib->vba.USRRetrainingLatency; - mmSOCParameters.SMNLatency = mode_lib->vba.SMNLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.UrgentLatency = v->UrgentLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.ExtraLatency = v->UrgentExtraLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.WritebackLatency = mode_lib->vba.WritebackLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.DRAMClockChangeLatency = mode_lib->vba.DRAMClockChangeLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.FCLKChangeLatency = mode_lib->vba.FCLKChangeLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SRExitTime = mode_lib->vba.SRExitTime; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SREnterPlusExitTime = mode_lib->vba.SREnterPlusExitTime; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SRExitZ8Time = mode_lib->vba.SRExitZ8Time; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->vba.SREnterPlusExitZ8Time; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.USRRetrainingLatency = mode_lib->vba.USRRetrainingLatency; + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters.SMNLatency = mode_lib->vba.SMNLatency; dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( mode_lib->vba.USRRetrainingRequiredFinal, @@ -1190,7 +1178,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->dpte_group_bytes, v->meta_row_height, v->meta_row_height_chroma, - mmSOCParameters, + v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.mmSOCParameters, mode_lib->vba.WritebackChunkSize, mode_lib->vba.SOCCLK, v->DCFCLKDeepSleep, @@ -1227,12 +1215,12 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman /* Output */ &v->Watermark, - &dummy_dramchange_support, + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_dramchange_support, v->MaxActiveDRAMClockChangeLatencySupported, v->SubViewportLinesNeededInMALL, - &dummy_fclkchange_support, + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_fclkchange_support, &v->MinActiveFCLKChangeLatencySupported, - &dummy_USRRetrainingSupport, + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_USRRetrainingSupport, mode_lib->vba.ActiveDRAMClockChangeLatencyMargin); /* DCN32 has a new struct Watermarks (typedef) which is used to store @@ -1494,9 +1482,6 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman { //Maximum Bandwidth Used - double TotalWRBandwidth = 0; - double WRBandwidth = 0; - for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.WritebackEnable[k] == true && mode_lib->vba.WritebackPixelFormat[k] == dm_444_32) { @@ -1590,9 +1575,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman #ifdef __DML_VBA_ALLOW_DELTA__ { - double dummy_single[2]; unsigned int dummy_integer[1]; - bool dummy_boolean[1]; // Calculate z8 stutter eff assuming 0 reserved space dml32_CalculateStutterEfficiency(v->CompressedBufferSizeInkByte, @@ -1645,14 +1628,14 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->meta_row_bw, v->dpte_row_bw, /* Output */ - &dummy_single[0], - &dummy_single[1], + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[0], + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_single[1], &dummy_integer[0], &v->Z8StutterEfficiencyNotIncludingVBlankBestCase, &v->Z8StutterEfficiencyBestCase, &v->Z8NumberOfStutterBurstsPerFrameBestCase, &v->StutterPeriodBestCase, - &dummy_boolean[0]); + &v->dummy_vars.DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation.dummy_boolean); } #else v->Z8StutterEfficiencyNotIncludingVBlankBestCase = v->Z8StutterEfficiencyNotIncludingVBlank; @@ -1666,34 +1649,89 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman #endif } +static void mode_support_configuration(struct vba_vars_st *v, + struct display_mode_lib *mode_lib) +{ + int i, j; + + for (i = v->soc.num_states - 1; i >= 0; i--) { + for (j = 0; j < 2; j++) { + if (mode_lib->vba.ScaleRatioAndTapsSupport == true + && mode_lib->vba.SourceFormatPixelAndScanSupport == true + && mode_lib->vba.ViewportSizeSupport[i][j] == true + && !mode_lib->vba.LinkRateDoesNotMatchDPVersion + && !mode_lib->vba.LinkRateForMultistreamNotIndicated + && !mode_lib->vba.BPPForMultistreamNotIndicated + && !mode_lib->vba.MultistreamWithHDMIOreDP + && !mode_lib->vba.ExceededMultistreamSlots[i] + && !mode_lib->vba.MSOOrODMSplitWithNonDPLink + && !mode_lib->vba.NotEnoughLanesForMSO + && mode_lib->vba.LinkCapacitySupport[i] == true && !mode_lib->vba.P2IWith420 + && !mode_lib->vba.DSCOnlyIfNecessaryWithBPP + && !mode_lib->vba.DSC422NativeNotSupported + && !mode_lib->vba.MPCCombineMethodIncompatible + && mode_lib->vba.ODMCombine2To1SupportCheckOK[i] == true + && mode_lib->vba.ODMCombine4To1SupportCheckOK[i] == true + && mode_lib->vba.NotEnoughDSCUnits[i] == false + && !mode_lib->vba.NotEnoughDSCSlices[i] + && !mode_lib->vba.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe + && !mode_lib->vba.InvalidCombinationOfMALLUseForPStateAndStaticScreen + && mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] == false + && mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] + && mode_lib->vba.DTBCLKRequiredMoreThanSupported[i] == false + && !mode_lib->vba.InvalidCombinationOfMALLUseForPState + && !mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified + && mode_lib->vba.ROBSupport[i][j] == true + && mode_lib->vba.DISPCLK_DPPCLK_Support[i][j] == true + && mode_lib->vba.TotalAvailablePipesSupport[i][j] == true + && mode_lib->vba.NumberOfOTGSupport == true + && mode_lib->vba.NumberOfHDMIFRLSupport == true + && mode_lib->vba.EnoughWritebackUnits == true + && mode_lib->vba.WritebackLatencySupport == true + && mode_lib->vba.WritebackScaleRatioAndTapsSupport == true + && mode_lib->vba.CursorSupport == true && mode_lib->vba.PitchSupport == true + && mode_lib->vba.ViewportExceedsSurface == false + && mode_lib->vba.PrefetchSupported[i][j] == true + && mode_lib->vba.VActiveBandwithSupport[i][j] == true + && mode_lib->vba.DynamicMetadataSupported[i][j] == true + && mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][j] == true + && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true + && mode_lib->vba.PTEBufferSizeNotExceeded[i][j] == true + && mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] == true + && mode_lib->vba.NonsupportedDSCInputBPC == false + && !mode_lib->vba.ExceededMALLSize + && ((mode_lib->vba.HostVMEnable == false + && !mode_lib->vba.ImmediateFlipRequiredFinal) + || mode_lib->vba.ImmediateFlipSupportedForState[i][j]) + && (!mode_lib->vba.DRAMClockChangeRequirementFinal + || i == v->soc.num_states - 1 + || mode_lib->vba.DRAMClockChangeSupport[i][j] != dm_dram_clock_change_unsupported) + && (!mode_lib->vba.FCLKChangeRequirementFinal || i == v->soc.num_states - 1 + || mode_lib->vba.FCLKChangeSupport[i][j] != dm_fclock_change_unsupported) + && (!mode_lib->vba.USRRetrainingRequiredFinal + || mode_lib->vba.USRRetrainingSupport[i][j])) { + mode_lib->vba.ModeSupport[i][j] = true; + } else { + mode_lib->vba.ModeSupport[i][j] = false; + } + } + } +} + void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) { - unsigned int dummy_integer[4]; - bool dummy_boolean[2]; - bool MPCCombineMethodAsNeededForPStateChangeAndVoltage; - bool MPCCombineMethodAsPossible; - enum odm_combine_mode dummy_odm_mode[DC__NUM_DPP__MAX]; - unsigned int TotalNumberOfActiveOTG; - unsigned int TotalNumberOfActiveHDMIFRL; - unsigned int TotalNumberOfActiveDP2p0; - unsigned int TotalNumberOfActiveDP2p0Outputs; - unsigned int TotalDSCUnitsRequired; - unsigned int m; - unsigned int ReorderingBytes; - bool FullFrameMALLPStateMethod; - bool SubViewportMALLPStateMethod; - bool PhantomPipeMALLPStateMethod; + struct vba_vars_st *v = &mode_lib->vba; + int i, j; + unsigned int k, m; unsigned int MaximumMPCCombine; + unsigned int NumberOfNonCombinedSurfaceOfMaximumBandwidth; + unsigned int TotalSlots; bool CompBufReservedSpaceNeedAdjustment; bool CompBufReservedSpaceNeedAdjustmentSingleDPP; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: called\n", __func__); #endif - struct vba_vars_st *v = &mode_lib->vba; - - int i, j; - unsigned int k; /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ @@ -1945,7 +1983,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.Read256BlockHeightC, mode_lib->vba.Read256BlockWidthY, mode_lib->vba.Read256BlockWidthC, - dummy_odm_mode, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_odm_mode, mode_lib->vba.BlendingAndTiming, mode_lib->vba.BytePerPixelY, mode_lib->vba.BytePerPixelC, @@ -1973,35 +2011,26 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.SingleDPPViewportSizeSupportPerSurface,/* bool ViewportSizeSupportPerSurface[] */ &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[1][0]); /* bool *ViewportSizeSupport */ - MPCCombineMethodAsNeededForPStateChangeAndVoltage = false; - MPCCombineMethodAsPossible = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible = false; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.MPCCombineUse[k] == dm_mpc_reduce_voltage_and_clocks) - MPCCombineMethodAsNeededForPStateChangeAndVoltage = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage = true; if (mode_lib->vba.MPCCombineUse[k] == dm_mpc_always_when_possible) - MPCCombineMethodAsPossible = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible = true; } - mode_lib->vba.MPCCombineMethodIncompatible = MPCCombineMethodAsNeededForPStateChangeAndVoltage - && MPCCombineMethodAsPossible; + mode_lib->vba.MPCCombineMethodIncompatible = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsNeededForPStateChangeAndVoltage + && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.MPCCombineMethodAsPossible; for (i = 0; i < v->soc.num_states; i++) { for (j = 0; j < 2; j++) { - bool NoChroma; mode_lib->vba.TotalNumberOfActiveDPP[i][j] = 0; mode_lib->vba.TotalAvailablePipesSupport[i][j] = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC = dm_odm_combine_mode_disabled; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC = dm_odm_combine_mode_disabled; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { - - bool TotalAvailablePipesSupportNoDSC; - unsigned int NumberOfDPPNoDSC; - enum odm_combine_mode ODMModeNoDSC = dm_odm_combine_mode_disabled; - double RequiredDISPCLKPerSurfaceNoDSC; - bool TotalAvailablePipesSupportDSC; - unsigned int NumberOfDPPDSC; - enum odm_combine_mode ODMModeDSC = dm_odm_combine_mode_disabled; - double RequiredDISPCLKPerSurfaceDSC; - dml32_CalculateODMMode( mode_lib->vba.MaximumPixelsPerLinePerDSCUnit, mode_lib->vba.HActive[k], @@ -2018,10 +2047,10 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DISPCLKDPPCLKVCOSpeed, /* Output */ - &TotalAvailablePipesSupportNoDSC, - &NumberOfDPPNoDSC, - &ODMModeNoDSC, - &RequiredDISPCLKPerSurfaceNoDSC); + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportNoDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPNoDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceNoDSC); dml32_CalculateODMMode( mode_lib->vba.MaximumPixelsPerLinePerDSCUnit, @@ -2039,10 +2068,10 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DISPCLKDPPCLKVCOSpeed, /* Output */ - &TotalAvailablePipesSupportDSC, - &NumberOfDPPDSC, - &ODMModeDSC, - &RequiredDISPCLKPerSurfaceDSC); + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceDSC); dml32_CalculateOutputLink( mode_lib->vba.PHYCLKPerState[i], @@ -2060,8 +2089,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.NumberOfDSCSlices[k], mode_lib->vba.AudioSampleRate[k], mode_lib->vba.AudioSampleLayout[k], - ODMModeNoDSC, - ODMModeDSC, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC, mode_lib->vba.DSCEnable[k], mode_lib->vba.OutputLinkDPLanes[k], mode_lib->vba.OutputLinkDPRate[k], @@ -2075,21 +2104,21 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &mode_lib->vba.RequiredSlots[i][k]); if (mode_lib->vba.RequiresDSC[i][k] == false) { - mode_lib->vba.ODMCombineEnablePerState[i][k] = ODMModeNoDSC; + mode_lib->vba.ODMCombineEnablePerState[i][k] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeNoDSC; mode_lib->vba.RequiredDISPCLKPerSurface[i][j][k] = - RequiredDISPCLKPerSurfaceNoDSC; - if (!TotalAvailablePipesSupportNoDSC) + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceNoDSC; + if (!v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportNoDSC) mode_lib->vba.TotalAvailablePipesSupport[i][j] = false; mode_lib->vba.TotalNumberOfActiveDPP[i][j] = - mode_lib->vba.TotalNumberOfActiveDPP[i][j] + NumberOfDPPNoDSC; + mode_lib->vba.TotalNumberOfActiveDPP[i][j] + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPNoDSC; } else { - mode_lib->vba.ODMCombineEnablePerState[i][k] = ODMModeDSC; + mode_lib->vba.ODMCombineEnablePerState[i][k] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ODMModeDSC; mode_lib->vba.RequiredDISPCLKPerSurface[i][j][k] = - RequiredDISPCLKPerSurfaceDSC; - if (!TotalAvailablePipesSupportDSC) + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.RequiredDISPCLKPerSurfaceDSC; + if (!v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalAvailablePipesSupportDSC) mode_lib->vba.TotalAvailablePipesSupport[i][j] = false; mode_lib->vba.TotalNumberOfActiveDPP[i][j] = - mode_lib->vba.TotalNumberOfActiveDPP[i][j] + NumberOfDPPDSC; + mode_lib->vba.TotalNumberOfActiveDPP[i][j] + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NumberOfDPPDSC; } } @@ -2124,7 +2153,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] = 0; - NoChroma = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma = true; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.NoOfDPP[i][j][k] == 1) @@ -2134,7 +2163,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l || mode_lib->vba.SourcePixelFormat[k] == dm_420_10 || mode_lib->vba.SourcePixelFormat[k] == dm_420_12 || mode_lib->vba.SourcePixelFormat[k] == dm_rgbe_alpha) { - NoChroma = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma = false; } } @@ -2145,15 +2174,15 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l if (j == 1 && !dml32_UnboundedRequest(mode_lib->vba.UseUnboundedRequesting, - mode_lib->vba.TotalNumberOfActiveDPP[i][j], NoChroma, + mode_lib->vba.TotalNumberOfActiveDPP[i][j], v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NoChroma, mode_lib->vba.Output[0], mode_lib->vba.SurfaceTiling[0], CompBufReservedSpaceNeedAdjustment, mode_lib->vba.DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)) { while (!(mode_lib->vba.TotalNumberOfActiveDPP[i][j] >= mode_lib->vba.MaxNumDPP || mode_lib->vba.TotalNumberOfSingleDPPSurfaces[i][j] == 0)) { - double BWOfNonCombinedSurfaceOfMaximumBandwidth = 0; - unsigned int NumberOfNonCombinedSurfaceOfMaximumBandwidth = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth = 0; + NumberOfNonCombinedSurfaceOfMaximumBandwidth = 0; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.MPCCombineUse[k] @@ -2161,13 +2190,13 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.MPCCombineUse[k] != dm_mpc_reduce_voltage && mode_lib->vba.ReadBandwidthLuma[k] + mode_lib->vba.ReadBandwidthChroma[k] > - BWOfNonCombinedSurfaceOfMaximumBandwidth && + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth && (mode_lib->vba.ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_2to1 && mode_lib->vba.ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) && mode_lib->vba.MPCCombine[i][j][k] == false) { - BWOfNonCombinedSurfaceOfMaximumBandwidth = + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.BWOfNonCombinedSurfaceOfMaximumBandwidth = mode_lib->vba.ReadBandwidthLuma[k] + mode_lib->vba.ReadBandwidthChroma[k]; NumberOfNonCombinedSurfaceOfMaximumBandwidth = k; @@ -2233,28 +2262,28 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } // i (VOLTAGE_STATE) /* Total Available OTG, HDMIFRL, DP Support Check */ - TotalNumberOfActiveOTG = 0; - TotalNumberOfActiveHDMIFRL = 0; - TotalNumberOfActiveDP2p0 = 0; - TotalNumberOfActiveDP2p0Outputs = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveHDMIFRL = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs = 0; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.BlendingAndTiming[k] == k) { - TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG + 1; if (mode_lib->vba.Output[k] == dm_dp2p0) { - TotalNumberOfActiveDP2p0 = TotalNumberOfActiveDP2p0 + 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 + 1; if (mode_lib->vba.OutputMultistreamId[k] == k || mode_lib->vba.OutputMultistreamEn[k] == false) { - TotalNumberOfActiveDP2p0Outputs = TotalNumberOfActiveDP2p0Outputs + 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs + 1; } } } } - mode_lib->vba.NumberOfOTGSupport = (TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG); - mode_lib->vba.NumberOfHDMIFRLSupport = (TotalNumberOfActiveHDMIFRL <= mode_lib->vba.MaxNumHDMIFRLOutputs); - mode_lib->vba.NumberOfDP2p0Support = (TotalNumberOfActiveDP2p0 <= mode_lib->vba.MaxNumDP2p0Streams - && TotalNumberOfActiveDP2p0Outputs <= mode_lib->vba.MaxNumDP2p0Outputs); + mode_lib->vba.NumberOfOTGSupport = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveOTG <= mode_lib->vba.MaxNumOTG); + mode_lib->vba.NumberOfHDMIFRLSupport = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveHDMIFRL <= mode_lib->vba.MaxNumHDMIFRLOutputs); + mode_lib->vba.NumberOfDP2p0Support = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0 <= mode_lib->vba.MaxNumDP2p0Streams + && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalNumberOfActiveDP2p0Outputs <= mode_lib->vba.MaxNumDP2p0Outputs); /* Display IO and DSC Support Check */ mode_lib->vba.NonsupportedDSCInputBPC = false; @@ -2269,8 +2298,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } for (i = 0; i < v->soc.num_states; ++i) { - unsigned int TotalSlots; - mode_lib->vba.ExceededMultistreamSlots[i] = false; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.OutputMultistreamEn[k] == true && mode_lib->vba.OutputMultistreamId[k] == k) { @@ -2441,12 +2468,12 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } /* Check DSC Unit and Slices Support */ - TotalDSCUnitsRequired = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = 0; for (i = 0; i < v->soc.num_states; ++i) { mode_lib->vba.NotEnoughDSCUnits[i] = false; mode_lib->vba.NotEnoughDSCSlices[i] = false; - TotalDSCUnitsRequired = 0; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = 0; mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = true; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.RequiresDSC[i][k] == true) { @@ -2454,33 +2481,31 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l if (mode_lib->vba.HActive[k] > 4 * mode_lib->vba.MaximumPixelsPerLinePerDSCUnit) mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false; - TotalDSCUnitsRequired = TotalDSCUnitsRequired + 4; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 4; if (mode_lib->vba.NumberOfDSCSlices[k] > 16) mode_lib->vba.NotEnoughDSCSlices[i] = true; } else if (mode_lib->vba.ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { if (mode_lib->vba.HActive[k] > 2 * mode_lib->vba.MaximumPixelsPerLinePerDSCUnit) mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false; - TotalDSCUnitsRequired = TotalDSCUnitsRequired + 2; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 2; if (mode_lib->vba.NumberOfDSCSlices[k] > 8) mode_lib->vba.NotEnoughDSCSlices[i] = true; } else { if (mode_lib->vba.HActive[k] > mode_lib->vba.MaximumPixelsPerLinePerDSCUnit) mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] = false; - TotalDSCUnitsRequired = TotalDSCUnitsRequired + 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired + 1; if (mode_lib->vba.NumberOfDSCSlices[k] > 4) mode_lib->vba.NotEnoughDSCSlices[i] = true; } } } - if (TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC) + if (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.TotalDSCUnitsRequired > mode_lib->vba.NumberOfDSC) mode_lib->vba.NotEnoughDSCUnits[i] = true; } /*DSC Delay per state*/ for (i = 0; i < v->soc.num_states; ++i) { - unsigned int m; - for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { mode_lib->vba.DSCDelayPerState[i][k] = dml32_DSCDelayRequirement( mode_lib->vba.RequiresDSC[i][k], mode_lib->vba.ODMCombineEnablePerState[i][k], @@ -2576,8 +2601,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.DETBufferSizeCThisState, &mode_lib->vba.UnboundedRequestEnabledThisState, &mode_lib->vba.CompressedBufferSizeInkByteThisState, - &dummy_integer[0], /* Long CompBufReservedSpaceKBytes */ - &dummy_boolean[0], /* bool CompBufReservedSpaceNeedAdjustment */ + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], /* Long CompBufReservedSpaceKBytes */ + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean[0], /* bool CompBufReservedSpaceNeedAdjustment */ v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_boolean_array[0], &mode_lib->vba.ViewportSizeSupport[i][j]); @@ -2942,7 +2967,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } - ReorderingBytes = mode_lib->vba.NumberOfChannels + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes = mode_lib->vba.NumberOfChannels * dml_max3(mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelDataOnly, mode_lib->vba.UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, mode_lib->vba.UrgentOutOfOrderReturnPerChannelVMDataOnly); @@ -2998,20 +3023,20 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l && (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)); } - FullFrameMALLPStateMethod = false; - SubViewportMALLPStateMethod = false; - PhantomPipeMALLPStateMethod = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod = false; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod = false; for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame) - FullFrameMALLPStateMethod = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod = true; if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) - SubViewportMALLPStateMethod = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod = true; if (mode_lib->vba.UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) - PhantomPipeMALLPStateMethod = true; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod = true; } - mode_lib->vba.InvalidCombinationOfMALLUseForPState = (SubViewportMALLPStateMethod - != PhantomPipeMALLPStateMethod) || (SubViewportMALLPStateMethod && FullFrameMALLPStateMethod); + mode_lib->vba.InvalidCombinationOfMALLUseForPState = (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod + != v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.PhantomPipeMALLPStateMethod) || (v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.SubViewportMALLPStateMethod && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.FullFrameMALLPStateMethod); if (mode_lib->vba.UseMinimumRequiredDCFCLK == true) { dml32_UseMinimumDCFCLK( @@ -3025,7 +3050,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.SREnterPlusExitTime, mode_lib->vba.ReturnBusWidth, mode_lib->vba.RoundTripPingLatencyCycles, - ReorderingBytes, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes, mode_lib->vba.PixelChunkSizeInKByte, mode_lib->vba.MetaChunkSize, mode_lib->vba.GPUVMEnable, @@ -3088,7 +3113,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l / mode_lib->vba.ReturnBWPerState[i][j] > (mode_lib->vba.RoundTripPingLatencyCycles + 32) / mode_lib->vba.DCFCLKState[i][j] - + ReorderingBytes / mode_lib->vba.ReturnBWPerState[i][j]) { + + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes / mode_lib->vba.ReturnBWPerState[i][j]) { mode_lib->vba.ROBSupport[i][j] = true; } else { mode_lib->vba.ROBSupport[i][j] = false; @@ -3130,9 +3155,6 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l for (i = 0; i < (int) v->soc.num_states; ++i) { for (j = 0; j <= 1; ++j) { - double VMDataOnlyReturnBWPerState; - double HostVMInefficiencyFactor; - unsigned int NextPrefetchModeState; mode_lib->vba.TimeCalc = 24 / mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j]; @@ -3172,37 +3194,35 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.UrgentBurstFactorChroma, mode_lib->vba.UrgentBurstFactorCursor); - VMDataOnlyReturnBWPerState = dml32_get_return_bw_mbps_vm_only(&mode_lib->vba.soc, i, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.VMDataOnlyReturnBWPerState = dml32_get_return_bw_mbps_vm_only(&mode_lib->vba.soc, i, mode_lib->vba.DCFCLKState[i][j], mode_lib->vba.FabricClockPerState[i], mode_lib->vba.DRAMSpeedPerState[i]); - HostVMInefficiencyFactor = 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor = 1; if (mode_lib->vba.GPUVMEnable && mode_lib->vba.HostVMEnable) - HostVMInefficiencyFactor = mode_lib->vba.ReturnBWPerState[i][j] - / VMDataOnlyReturnBWPerState; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor = mode_lib->vba.ReturnBWPerState[i][j] + / v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.VMDataOnlyReturnBWPerState; mode_lib->vba.ExtraLatency = dml32_CalculateExtraLatency( - mode_lib->vba.RoundTripPingLatencyCycles, ReorderingBytes, + mode_lib->vba.RoundTripPingLatencyCycles, v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.ReorderingBytes, mode_lib->vba.DCFCLKState[i][j], mode_lib->vba.TotalNumberOfActiveDPP[i][j], mode_lib->vba.PixelChunkSizeInKByte, mode_lib->vba.TotalNumberOfDCCActiveDPP[i][j], mode_lib->vba.MetaChunkSize, mode_lib->vba.ReturnBWPerState[i][j], mode_lib->vba.GPUVMEnable, mode_lib->vba.HostVMEnable, mode_lib->vba.NumberOfActiveSurfaces, mode_lib->vba.NoOfDPPThisState, mode_lib->vba.dpte_group_bytes, - HostVMInefficiencyFactor, mode_lib->vba.HostVMMinPageSize, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, mode_lib->vba.HostVMMinPageSize, mode_lib->vba.HostVMMaxNonCachedPageTableLevels); - NextPrefetchModeState = mode_lib->vba.MinPrefetchMode; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState = mode_lib->vba.MinPrefetchMode; mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[i][j]; do { - mode_lib->vba.PrefetchModePerState[i][j] = NextPrefetchModeState; + mode_lib->vba.PrefetchModePerState[i][j] = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState; mode_lib->vba.MaxVStartup = mode_lib->vba.NextMaxVStartup; for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { - DmlPipe myPipe; - mode_lib->vba.TWait = dml32_CalculateTWait( mode_lib->vba.PrefetchModePerState[i][j], mode_lib->vba.UsesMALLForPStateChange[k], @@ -3212,34 +3232,34 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.FCLKChangeLatency, mode_lib->vba.UrgLatency[i], mode_lib->vba.SREnterPlusExitTime); - myPipe.Dppclk = mode_lib->vba.RequiredDPPCLK[i][j][k]; - myPipe.Dispclk = mode_lib->vba.RequiredDISPCLK[i][j]; - myPipe.PixelClock = mode_lib->vba.PixelClock[k]; - myPipe.DCFClkDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j]; - myPipe.DPPPerSurface = mode_lib->vba.NoOfDPP[i][j][k]; - myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; - myPipe.SourceRotation = mode_lib->vba.SourceRotation[k]; - myPipe.BlockWidth256BytesY = mode_lib->vba.Read256BlockWidthY[k]; - myPipe.BlockHeight256BytesY = mode_lib->vba.Read256BlockHeightY[k]; - myPipe.BlockWidth256BytesC = mode_lib->vba.Read256BlockWidthC[k]; - myPipe.BlockHeight256BytesC = mode_lib->vba.Read256BlockHeightC[k]; - myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; - myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; - myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; - myPipe.HTotal = mode_lib->vba.HTotal[k]; - myPipe.HActive = mode_lib->vba.HActive[k]; - myPipe.DCCEnable = mode_lib->vba.DCCEnable[k]; - myPipe.ODMMode = mode_lib->vba.ODMCombineEnablePerState[i][k]; - myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k]; - myPipe.BytePerPixelY = mode_lib->vba.BytePerPixelY[k]; - myPipe.BytePerPixelC = mode_lib->vba.BytePerPixelC[k]; - myPipe.ProgressiveToInterlaceUnitInOPP = + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.Dppclk = mode_lib->vba.RequiredDPPCLK[i][j][k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.Dispclk = mode_lib->vba.RequiredDISPCLK[i][j]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.PixelClock = mode_lib->vba.PixelClock[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DCFClkDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[i][j]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DPPPerSurface = mode_lib->vba.NoOfDPP[i][j][k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.SourceRotation = mode_lib->vba.SourceRotation[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockWidth256BytesY = mode_lib->vba.Read256BlockWidthY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockHeight256BytesY = mode_lib->vba.Read256BlockHeightY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockWidth256BytesC = mode_lib->vba.Read256BlockWidthC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BlockHeight256BytesC = mode_lib->vba.Read256BlockHeightC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.InterlaceEnable = mode_lib->vba.Interlace[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.HTotal = mode_lib->vba.HTotal[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.HActive = mode_lib->vba.HActive[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.DCCEnable = mode_lib->vba.DCCEnable[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ODMMode = mode_lib->vba.ODMCombineEnablePerState[i][k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.SourcePixelFormat = mode_lib->vba.SourcePixelFormat[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BytePerPixelY = mode_lib->vba.BytePerPixelY[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.BytePerPixelC = mode_lib->vba.BytePerPixelC[k]; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.ProgressiveToInterlaceUnitInOPP = mode_lib->vba.ProgressiveToInterlaceUnitInOPP; mode_lib->vba.NoTimeForPrefetch[i][j][k] = dml32_CalculatePrefetchSchedule( - HostVMInefficiencyFactor, - &myPipe, + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe, mode_lib->vba.DSCDelayPerState[i][k], mode_lib->vba.DPPCLKDelaySubtotal + mode_lib->vba.DPPCLKDelayCNVCFormater, @@ -3298,7 +3318,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[0], // double *Tdmdl_vm &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[1], // double *Tdmdl &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[2], // double *TSetup - &dummy_integer[0], // unsigned int *VUpdateOffsetPix + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], // unsigned int *VUpdateOffsetPix &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[3], // unsigned int *VUpdateWidthPix &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single[4]); // unsigned int *VReadyOffsetPix } @@ -3427,7 +3447,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) { - dml32_CalculateFlipSchedule(HostVMInefficiencyFactor, + dml32_CalculateFlipSchedule(v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.HostVMInefficiencyFactor, mode_lib->vba.ExtraLatency, mode_lib->vba.UrgLatency[i], mode_lib->vba.GPUVMMaxPageTableLevels, @@ -3501,7 +3521,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l if (mode_lib->vba.MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || mode_lib->vba.AnyLinesForVMOrRowTooLarge == false) { mode_lib->vba.NextMaxVStartup = mode_lib->vba.MaxMaxVStartup[i][j]; - NextPrefetchModeState = NextPrefetchModeState + 1; + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState = v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState + 1; } else { mode_lib->vba.NextMaxVStartup = mode_lib->vba.NextMaxVStartup - 1; } @@ -3515,7 +3535,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l && !mode_lib->vba.ImmediateFlipRequiredFinal) || mode_lib->vba.ImmediateFlipSupportedForState[i][j] == true)) || (mode_lib->vba.NextMaxVStartup == mode_lib->vba.MaxMaxVStartup[i][j] - && NextPrefetchModeState > mode_lib->vba.MaxPrefetchMode))); + && v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.NextPrefetchModeState > mode_lib->vba.MaxPrefetchMode))); for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { mode_lib->vba.use_one_row_for_frame_this_state[k] = @@ -3591,7 +3611,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l &mode_lib->vba.Watermark, // Store the values in vba &mode_lib->vba.DRAMClockChangeSupport[i][j], &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single2[0], // double *MaxActiveDRAMClockChangeLatencySupported - &dummy_integer[0], // Long SubViewportLinesNeededInMALL[] + &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_integer[0], // Long SubViewportLinesNeededInMALL[] &mode_lib->vba.FCLKChangeSupport[i][j], &v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.dummy_single2[1], // double *MinActiveFCLKChangeLatencySupported &mode_lib->vba.USRRetrainingSupport[i][j], @@ -3671,68 +3691,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } /*Mode Support, Voltage State and SOC Configuration*/ - for (i = v->soc.num_states - 1; i >= 0; i--) { - for (j = 0; j < 2; j++) { - if (mode_lib->vba.ScaleRatioAndTapsSupport == true - && mode_lib->vba.SourceFormatPixelAndScanSupport == true - && mode_lib->vba.ViewportSizeSupport[i][j] == true - && !mode_lib->vba.LinkRateDoesNotMatchDPVersion - && !mode_lib->vba.LinkRateForMultistreamNotIndicated - && !mode_lib->vba.BPPForMultistreamNotIndicated - && !mode_lib->vba.MultistreamWithHDMIOreDP - && !mode_lib->vba.ExceededMultistreamSlots[i] - && !mode_lib->vba.MSOOrODMSplitWithNonDPLink - && !mode_lib->vba.NotEnoughLanesForMSO - && mode_lib->vba.LinkCapacitySupport[i] == true && !mode_lib->vba.P2IWith420 - && !mode_lib->vba.DSCOnlyIfNecessaryWithBPP - && !mode_lib->vba.DSC422NativeNotSupported - && !mode_lib->vba.MPCCombineMethodIncompatible - && mode_lib->vba.ODMCombine2To1SupportCheckOK[i] == true - && mode_lib->vba.ODMCombine4To1SupportCheckOK[i] == true - && mode_lib->vba.NotEnoughDSCUnits[i] == false - && !mode_lib->vba.NotEnoughDSCSlices[i] - && !mode_lib->vba.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe - && !mode_lib->vba.InvalidCombinationOfMALLUseForPStateAndStaticScreen - && mode_lib->vba.DSCCLKRequiredMoreThanSupported[i] == false - && mode_lib->vba.PixelsPerLinePerDSCUnitSupport[i] - && mode_lib->vba.DTBCLKRequiredMoreThanSupported[i] == false - && !mode_lib->vba.InvalidCombinationOfMALLUseForPState - && !mode_lib->vba.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified - && mode_lib->vba.ROBSupport[i][j] == true - && mode_lib->vba.DISPCLK_DPPCLK_Support[i][j] == true - && mode_lib->vba.TotalAvailablePipesSupport[i][j] == true - && mode_lib->vba.NumberOfOTGSupport == true - && mode_lib->vba.NumberOfHDMIFRLSupport == true - && mode_lib->vba.EnoughWritebackUnits == true - && mode_lib->vba.WritebackLatencySupport == true - && mode_lib->vba.WritebackScaleRatioAndTapsSupport == true - && mode_lib->vba.CursorSupport == true && mode_lib->vba.PitchSupport == true - && mode_lib->vba.ViewportExceedsSurface == false - && mode_lib->vba.PrefetchSupported[i][j] == true - && mode_lib->vba.VActiveBandwithSupport[i][j] == true - && mode_lib->vba.DynamicMetadataSupported[i][j] == true - && mode_lib->vba.TotalVerticalActiveBandwidthSupport[i][j] == true - && mode_lib->vba.VRatioInPrefetchSupported[i][j] == true - && mode_lib->vba.PTEBufferSizeNotExceeded[i][j] == true - && mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] == true - && mode_lib->vba.NonsupportedDSCInputBPC == false - && !mode_lib->vba.ExceededMALLSize - && ((mode_lib->vba.HostVMEnable == false - && !mode_lib->vba.ImmediateFlipRequiredFinal) - || mode_lib->vba.ImmediateFlipSupportedForState[i][j]) - && (!mode_lib->vba.DRAMClockChangeRequirementFinal - || i == v->soc.num_states - 1 - || mode_lib->vba.DRAMClockChangeSupport[i][j] != dm_dram_clock_change_unsupported) - && (!mode_lib->vba.FCLKChangeRequirementFinal || i == v->soc.num_states - 1 - || mode_lib->vba.FCLKChangeSupport[i][j] != dm_fclock_change_unsupported) - && (!mode_lib->vba.USRRetrainingRequiredFinal - || mode_lib->vba.USRRetrainingSupport[i][j])) { - mode_lib->vba.ModeSupport[i][j] = true; - } else { - mode_lib->vba.ModeSupport[i][j] = false; - } - } - } + mode_support_configuration(v, mode_lib); MaximumMPCCombine = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c index 84b4b00f29cbd..c87091683b5dc 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -498,6 +498,13 @@ void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; } + if ((int)(dcn3_21_soc.fclk_change_latency_us * 1000) + != dc->bb_overrides.fclk_clock_change_latency_ns + && dc->bb_overrides.fclk_clock_change_latency_ns) { + dcn3_21_soc.fclk_change_latency_us = + dc->bb_overrides.fclk_clock_change_latency_ns / 1000; + } + if ((int)(dcn3_21_soc.dummy_pstate_latency_us * 1000) != dc->bb_overrides.dummy_clock_change_latency_ns && dc->bb_overrides.dummy_clock_change_latency_ns) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index e8b094006d95d..c596187a1e096 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -26,6 +26,16 @@ #include "dc_features.h" #include "display_mode_enums.h" +/** + * DOC: overview + * + * Most of the DML code is automatically generated and tested via hardware + * description language. Usually, we use the reference _vcs_dpi in the code + * where VCS means "Verilog Compiled Simulator" and DPI stands for "Direct + * Programmer Interface". In other words, those structs can be used to + * interface with Verilog with other languages such as C. + */ + #ifndef __DISPLAY_MODE_STRUCTS_H__ #define __DISPLAY_MODE_STRUCTS_H__ @@ -159,13 +169,20 @@ struct _vcs_dpi_voltage_scaling_st { double dtbclk_mhz; }; +/** + * _vcs_dpi_soc_bounding_box_st: SOC definitions + * + * This struct maintains the SOC Bounding Box information for the ASIC; it + * defines things such as clock, voltage, performance, etc. Usually, we load + * these values from VBIOS; if something goes wrong, we use some hard-coded + * values, which will enable the ASIC to light up with limitations. + */ struct _vcs_dpi_soc_bounding_box_st { struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES]; - /* - * This is a temporary stash for updating @clock_limits with the PMFW - * clock table. Do not use outside of *update_bw_boudning_box functions. + /** + * @num_states: It represents the total of Display Power Management + * (DPM) supported by the specific ASIC. */ - struct _vcs_dpi_voltage_scaling_st _clock_tmp[DC__VOLTAGE_STATES]; unsigned int num_states; double sr_exit_time_us; double sr_enter_plus_exit_time_us; @@ -231,6 +248,14 @@ struct _vcs_dpi_soc_bounding_box_st { enum self_refresh_affinity allow_dram_self_refresh_or_dram_clock_change_in_vblank; }; +/** + * @_vcs_dpi_ip_params_st: IP configuraion for DCN blocks + * + * In this struct you can find the DCN configuration associated to the specific + * ASIC. For example, here we can save how many DPPs the ASIC is using and it + * is available. + * + */ struct _vcs_dpi_ip_params_st { bool use_min_dcfclk; bool clamp_min_dcfclk; @@ -283,6 +308,9 @@ struct _vcs_dpi_ip_params_st { unsigned int writeback_line_buffer_chroma_buffer_size; unsigned int max_page_table_levels; + /** + * @max_num_dpp: Maximum number of DPP supported in the target ASIC. + */ unsigned int max_num_dpp; unsigned int max_num_otg; unsigned int cursor_chunk_size; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index 6e61b53823619..492aec634b685 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -196,6 +196,13 @@ struct DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCal unsigned int ReorderBytes; unsigned int VMDataOnlyReturnBW; double HostVMInefficiencyFactor; + DmlPipe myPipe; + SOCParametersList mmSOCParameters; + double dummy_unit_vector[DC__NUM_DPP__MAX]; + double dummy_single[2]; + enum clock_change_support dummy_dramchange_support; + enum dm_fclock_change_support dummy_fclkchange_support; + bool dummy_USRRetrainingSupport; }; struct dml32_ModeSupportAndSystemConfigurationFull { @@ -211,6 +218,35 @@ struct dml32_ModeSupportAndSystemConfigurationFull { double DSTXAfterScaler[DC__NUM_DPP__MAX]; double MaxTotalVActiveRDBandwidth; bool dummy_boolean_array[2][DC__NUM_DPP__MAX]; + enum odm_combine_mode dummy_odm_mode[DC__NUM_DPP__MAX]; + DmlPipe myPipe; + unsigned int dummy_integer[4]; + unsigned int TotalNumberOfActiveOTG; + unsigned int TotalNumberOfActiveHDMIFRL; + unsigned int TotalNumberOfActiveDP2p0; + unsigned int TotalNumberOfActiveDP2p0Outputs; + unsigned int TotalDSCUnitsRequired; + unsigned int ReorderingBytes; + unsigned int TotalSlots; + unsigned int NumberOfDPPDSC; + unsigned int NumberOfDPPNoDSC; + unsigned int NextPrefetchModeState; + bool MPCCombineMethodAsNeededForPStateChangeAndVoltage; + bool MPCCombineMethodAsPossible; + bool FullFrameMALLPStateMethod; + bool SubViewportMALLPStateMethod; + bool PhantomPipeMALLPStateMethod; + bool NoChroma; + bool TotalAvailablePipesSupportNoDSC; + bool TotalAvailablePipesSupportDSC; + enum odm_combine_mode ODMModeNoDSC; + enum odm_combine_mode ODMModeDSC; + double RequiredDISPCLKPerSurfaceNoDSC; + double RequiredDISPCLKPerSurfaceDSC; + double BWOfNonCombinedSurfaceOfMaximumBandwidth; + double VMDataOnlyReturnBWPerState; + double HostVMInefficiencyFactor; + bool dummy_boolean[2]; }; struct dummy_vars { diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 4cdcb9751a9cb..598e36108dc8b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -402,6 +402,10 @@ struct pipe_ctx { struct dc_stream_state *stream; struct plane_resource plane_res; + + /** + * @stream_res: Reference to DCN resource components such OPP and DSC. + */ struct stream_resource stream_res; struct link_resource link_res; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h index 5097037e39625..8d86159d9de09 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h @@ -22,6 +22,16 @@ * */ +/** + * DOC: mpc-overview + * + * Multiple Pipe/Plane Combined (MPC) is a component in the hardware pipeline + * that performs blending of multiple planes, using global and per-pixel alpha. + * It also performs post-blending color correction operations according to the + * hardware capabilities, such as color transformation matrix and gamma 1D and + * 3D LUT. + */ + #ifndef __DC_MPCC_H__ #define __DC_MPCC_H__ @@ -48,14 +58,39 @@ enum mpcc_blend_mode { MPCC_BLEND_MODE_TOP_BOT_BLENDING }; +/** + * enum mpcc_alpha_blend_mode - define the alpha blend mode regarding pixel + * alpha and plane alpha values + */ enum mpcc_alpha_blend_mode { + /** + * @MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA: per pixel alpha using DPP + * alpha value + */ MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA, + /** + * @MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN: per + * pixel alpha using DPP alpha value multiplied by a global gain (plane + * alpha) + */ MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN, + /** + * @MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA: global alpha value, ignores + * pixel alpha and consider only plane alpha + */ MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA }; -/* - * MPCC blending configuration +/** + * struct mpcc_blnd_cfg - MPCC blending configuration + * + * @black_color: background color + * @alpha_mode: alpha blend mode (MPCC_ALPHA_BLND_MODE) + * @pre_multiplied_alpha: whether pixel color values were pre-multiplied by the + * alpha channel (MPCC_ALPHA_MULTIPLIED_MODE) + * @global_gain: used when blend mode considers both pixel alpha and plane + * alpha value and assumes the global alpha value. + * @global_alpha: plane alpha value */ struct mpcc_blnd_cfg { struct tg_color black_color; /* background color */ @@ -107,8 +142,15 @@ struct mpc_dwb_flow_control { int flow_ctrl_cnt1; }; -/* - * MPCC connection and blending configuration for a single MPCC instance. +/** + * struct mpcc - MPCC connection and blending configuration for a single MPCC instance. + * @mpcc_id: MPCC physical instance + * @dpp_id: DPP input to this MPCC + * @mpcc_bot: pointer to bottom layer MPCC. NULL when not connected. + * @blnd_cfg: the blending configuration for this MPCC + * @sm_cfg: stereo mix setting for this MPCC + * @shared_bottom: if MPCC output to both OPP and DWB endpoints, true. Otherwise, false. + * * This struct is used as a node in an MPC tree. */ struct mpcc { @@ -120,8 +162,12 @@ struct mpcc { bool shared_bottom; /* TRUE if MPCC output to both OPP and DWB endpoints, else FALSE */ }; -/* - * MPC tree represents all MPCC connections for a pipe. +/** + * struct mpc_tree - MPC tree represents all MPCC connections for a pipe. + * + * @opp_id: the OPP instance that owns this MPC tree + * @opp_list: the top MPCC layer of the MPC tree that outputs to OPP endpoint + * */ struct mpc_tree { int opp_id; /* The OPP instance that owns this MPC tree */ @@ -149,13 +195,18 @@ struct mpcc_state { uint32_t busy; }; +/** + * struct mpc_funcs - funcs + */ struct mpc_funcs { void (*read_mpcc_state)( struct mpc *mpc, int mpcc_inst, struct mpcc_state *s); - /* + /** + * @insert_plane: + * * Insert DPP into MPC tree based on specified blending position. * Only used for planes that are part of blending chain for OPP output * @@ -180,7 +231,9 @@ struct mpc_funcs { int dpp_id, int mpcc_id); - /* + /** + * @remove_mpcc: + * * Remove a specified MPCC from the MPC tree. * * Parameters: @@ -195,7 +248,9 @@ struct mpc_funcs { struct mpc_tree *tree, struct mpcc *mpcc); - /* + /** + * @mpc_init: + * * Reset the MPCC HW status by disconnecting all muxes. * * Parameters: @@ -208,7 +263,9 @@ struct mpc_funcs { struct mpc *mpc, unsigned int mpcc_id); - /* + /** + * @update_blending: + * * Update the blending configuration for a specified MPCC. * * Parameters: @@ -223,7 +280,9 @@ struct mpc_funcs { struct mpcc_blnd_cfg *blnd_cfg, int mpcc_id); - /* + /** + * @cursor_lock: + * * Lock cursor updates for the specified OPP. * OPP defines the set of MPCC that are locked together for cursor. * @@ -239,8 +298,10 @@ struct mpc_funcs { int opp_id, bool lock); - /* - * Add DPP into 'secondary' MPC tree based on specified blending position. + /** + * @insert_plane_to_secondary: + * + * Add DPP into secondary MPC tree based on specified blending position. * Only used for planes that are part of blending chain for DWB output * * Parameters: @@ -264,7 +325,9 @@ struct mpc_funcs { int dpp_id, int mpcc_id); - /* + /** + * @remove_mpcc_from_secondary: + * * Remove a specified DPP from the 'secondary' MPC tree. * * Parameters: diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 7f97718e68493..7e43b4403b3e3 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -139,7 +139,13 @@ struct crc_params { bool enable; }; +/** + * struct timing_generator - Entry point to Output Timing Generator feature. + */ struct timing_generator { + /** + * @funcs: Timing generator control functions + */ const struct timing_generator_funcs *funcs; struct dc_bios *bp; struct dc_context *ctx; @@ -150,7 +156,9 @@ struct dc_crtc_timing; struct drr_params; - +/** + * struct timing_generator_funcs - Control timing generator on a given device. + */ struct timing_generator_funcs { bool (*validate_timing)(struct timing_generator *tg, const struct dc_crtc_timing *timing); @@ -275,8 +283,8 @@ struct timing_generator_funcs { const struct crc_params *params); /** - * Get CRCs for the given timing generator. Return false if CRCs are - * not enabled (via configure_crc). + * @get_crc: Get CRCs for the given timing generator. Return false if + * CRCs are not enabled (via configure_crc). */ bool (*get_crc)(struct timing_generator *tg, uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb); diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index ab06c7fc74524..e054f3494087f 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -244,7 +244,7 @@ enum { #define ASICREV_IS_GC_10_3_7(eChipRev) ((eChipRev >= GC_10_3_7_A0) && (eChipRev < GC_10_3_7_UNKNOWN)) #define AMDGPU_FAMILY_GC_11_0_0 145 -#define AMDGPU_FAMILY_GC_11_0_2 148 +#define AMDGPU_FAMILY_GC_11_0_1 148 #define GC_11_0_0_A0 0x1 #define GC_11_0_2_A0 0x10 #define GC_11_UNKNOWN 0xFF diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index da09ba7589f73..0f39ab9dc5b41 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -613,10 +613,6 @@ static void build_vrr_infopacket_data_v1(const struct mod_vrr_params *vrr, * Note: We should never go above the field rate of the mode timing set. */ infopacket->sb[8] = (unsigned char)((vrr->max_refresh_in_uhz + 500000) / 1000000); - - /* FreeSync HDR */ - infopacket->sb[9] = 0; - infopacket->sb[10] = 0; } static void build_vrr_infopacket_data_v3(const struct mod_vrr_params *vrr, @@ -684,10 +680,6 @@ static void build_vrr_infopacket_data_v3(const struct mod_vrr_params *vrr, /* PB16 : Reserved bits 7:1, FixedRate bit 0 */ infopacket->sb[16] = (vrr->state == VRR_STATE_ACTIVE_FIXED) ? 1 : 0; - - //FreeSync HDR - infopacket->sb[9] = 0; - infopacket->sb[10] = 0; } static void build_vrr_infopacket_fs2_data(enum color_transfer_func app_tf, @@ -772,8 +764,7 @@ static void build_vrr_infopacket_header_v2(enum signal_type signal, /* HB2 = [Bits 7:5 = 0] [Bits 4:0 = Length = 0x09] */ infopacket->hb2 = 0x09; - *payload_size = 0x0A; - + *payload_size = 0x09; } else if (dc_is_dp_signal(signal)) { /* HEADER */ @@ -822,9 +813,9 @@ static void build_vrr_infopacket_header_v3(enum signal_type signal, infopacket->hb1 = version; /* HB2 = [Bits 7:5 = 0] [Bits 4:0 = Length] */ - *payload_size = 0x10; - infopacket->hb2 = *payload_size - 1; //-1 for checksum + infopacket->hb2 = 0x10; + *payload_size = 0x10; } else if (dc_is_dp_signal(signal)) { /* HEADER */ diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h index 6d79630c2d16f..b7c1445b0bed7 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h @@ -3131,6 +3131,8 @@ #define mmGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32_BASE_IDX 0 #define mmGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32 0x15cc #define mmGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32_BASE_IDX 0 +#define mmGCVM_DEBUG 0x15cd +#define mmGCVM_DEBUG_BASE_IDX 0 #define mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32 0x15ce #define mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32_BASE_IDX 0 #define mmGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32 0x15cf @@ -3153,6 +3155,8 @@ #define mmGCVM_L2_BANK_SELECT_RESERVED_CID2_BASE_IDX 0 #define mmGCVM_L2_CACHE_PARITY_CNTL 0x15d8 #define mmGCVM_L2_CACHE_PARITY_CNTL_BASE_IDX 0 +#define mmGCVM_L2_IH_LOG_CNTL 0x15d9 +#define mmGCVM_L2_IH_LOG_CNTL_BASE_IDX 0 #define mmGCVM_L2_CNTL5 0x15dc #define mmGCVM_L2_CNTL5_BASE_IDX 0 #define mmGCVM_L2_GCR_CNTL 0x15dd diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h index 76f695a1d0658..ae2d337158f3b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h @@ -27,7 +27,7 @@ // *** IMPORTANT *** // SMU TEAM: Always increment the interface version if // any structure is changed in this file -#define PMFW_DRIVER_IF_VERSION 4 +#define PMFW_DRIVER_IF_VERSION 5 typedef struct { int32_t value; @@ -197,6 +197,8 @@ typedef struct { uint16_t SkinTemp; uint16_t DeviceState; + uint16_t CurTemp; //[centi-Celsius] + uint16_t spare2; } SmuMetrics_t; typedef struct { diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_0_7_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_0_7_ppsmc.h index d2e10a724560f..82cf9e563065f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_0_7_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_0_7_ppsmc.h @@ -137,7 +137,7 @@ #define PPSMC_MSG_DisallowGpo 0x56 #define PPSMC_MSG_Enable2ndUSB20Port 0x57 - -#define PPSMC_Message_Count 0x58 +#define PPSMC_MSG_DriverMode2Reset 0x5D +#define PPSMC_Message_Count 0x5E #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index 19084a4fcb2bd..28f6a1eb6945c 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -235,7 +235,8 @@ __SMU_DUMMY_MAP(UnforceGfxVid), \ __SMU_DUMMY_MAP(HeavySBR), \ __SMU_DUMMY_MAP(SetBadHBMPagesRetiredFlagsPerChannel), \ - __SMU_DUMMY_MAP(EnableGfxImu), + __SMU_DUMMY_MAP(EnableGfxImu), \ + __SMU_DUMMY_MAP(DriverMode2Reset), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 84e61a9967acf..2765ebf8720a8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -154,6 +154,7 @@ static struct cmn2asic_msg_mapping sienna_cichlid_message_map[SMU_MSG_MAX_COUNT] MSG_MAP(SetGpoFeaturePMask, PPSMC_MSG_SetGpoFeaturePMask, 0), MSG_MAP(DisallowGpo, PPSMC_MSG_DisallowGpo, 0), MSG_MAP(Enable2ndUSB20Port, PPSMC_MSG_Enable2ndUSB20Port, 0), + MSG_MAP(DriverMode2Reset, PPSMC_MSG_DriverMode2Reset, 0), }; static struct cmn2asic_mapping sienna_cichlid_clk_map[SMU_CLK_COUNT] = { @@ -4254,6 +4255,57 @@ static int sienna_cichlid_stb_get_data_direct(struct smu_context *smu, return 0; } +static bool sienna_cichlid_is_mode2_reset_supported(struct smu_context *smu) +{ + return true; +} + +static int sienna_cichlid_mode2_reset(struct smu_context *smu) +{ + u32 smu_version; + int ret = 0, index; + struct amdgpu_device *adev = smu->adev; + int timeout = 100; + + smu_cmn_get_smc_version(smu, NULL, &smu_version); + + index = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_MSG, + SMU_MSG_DriverMode2Reset); + + mutex_lock(&smu->message_lock); + + ret = smu_cmn_send_msg_without_waiting(smu, (uint16_t)index, + SMU_RESET_MODE_2); + + ret = smu_cmn_wait_for_response(smu); + while (ret != 0 && timeout) { + ret = smu_cmn_wait_for_response(smu); + /* Wait a bit more time for getting ACK */ + if (ret != 0) { + --timeout; + usleep_range(500, 1000); + continue; + } else { + break; + } + } + + if (!timeout) { + dev_err(adev->dev, + "failed to send mode2 message \tparam: 0x%08x response %#x\n", + SMU_RESET_MODE_2, ret); + goto out; + } + + dev_info(smu->adev->dev, "restore config space...\n"); + /* Restore the config space saved during init */ + amdgpu_device_load_pci_state(adev->pdev); +out: + mutex_unlock(&smu->message_lock); + + return ret; +} + static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .get_allowed_feature_mask = sienna_cichlid_get_allowed_feature_mask, .set_default_dpm_table = sienna_cichlid_set_default_dpm_table, @@ -4283,6 +4335,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .dump_pptable = sienna_cichlid_dump_pptable, .init_microcode = smu_v11_0_init_microcode, .load_microcode = smu_v11_0_load_microcode, + .fini_microcode = smu_v11_0_fini_microcode, .init_smc_tables = sienna_cichlid_init_smc_tables, .fini_smc_tables = smu_v11_0_fini_smc_tables, .init_power = smu_v11_0_init_power, @@ -4348,6 +4401,8 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .get_default_config_table_settings = sienna_cichlid_get_default_config_table_settings, .set_config_table = sienna_cichlid_set_config_table, .get_unique_id = sienna_cichlid_get_unique_id, + .mode2_reset_is_support = sienna_cichlid_is_mode2_reset_supported, + .mode2_reset = sienna_cichlid_mode2_reset, }; void sienna_cichlid_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c index 82d3718d83244..97e1d55dcaad5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c @@ -71,7 +71,6 @@ static struct cmn2asic_msg_mapping smu_v13_0_4_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetPmfwVersion, 1), MSG_MAP(GetDriverIfVersion, PPSMC_MSG_GetDriverIfVersion, 1), - MSG_MAP(EnableGfxOff, PPSMC_MSG_EnableGfxOff, 1), MSG_MAP(AllowGfxOff, PPSMC_MSG_AllowGfxOff, 1), MSG_MAP(DisallowGfxOff, PPSMC_MSG_DisallowGfxOff, 1), MSG_MAP(PowerDownVcn, PPSMC_MSG_PowerDownVcn, 1), @@ -199,6 +198,9 @@ static int smu_v13_0_4_fini_smc_tables(struct smu_context *smu) kfree(smu_table->watermarks_table); smu_table->watermarks_table = NULL; + kfree(smu_table->gpu_metrics_table); + smu_table->gpu_metrics_table = NULL; + return 0; } @@ -226,18 +228,6 @@ static int smu_v13_0_4_system_features_control(struct smu_context *smu, bool en) return ret; } -static int smu_v13_0_4_post_smu_init(struct smu_context *smu) -{ - struct amdgpu_device *adev = smu->adev; - int ret = 0; - - /* allow message will be sent after enable message */ - ret = smu_cmn_send_smc_msg(smu, SMU_MSG_EnableGfxOff, NULL); - if (ret) - dev_err(adev->dev, "Failed to Enable GfxOff!\n"); - return ret; -} - static ssize_t smu_v13_0_4_get_gpu_metrics(struct smu_context *smu, void **table) { @@ -1026,7 +1016,6 @@ static const struct pptable_funcs smu_v13_0_4_ppt_funcs = { .get_pp_feature_mask = smu_cmn_get_pp_feature_mask, .set_driver_table_location = smu_v13_0_set_driver_table_location, .gfx_off_control = smu_v13_0_gfx_off_control, - .post_init = smu_v13_0_4_post_smu_init, .mode2_reset = smu_v13_0_4_mode2_reset, .get_dpm_ultimate_freq = smu_v13_0_4_get_dpm_ultimate_freq, .od_edit_dpm_table = smu_v13_0_od_edit_dpm_table, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c index 47360ef5c1758..66445964efbd1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c @@ -176,6 +176,9 @@ static int smu_v13_0_5_fini_smc_tables(struct smu_context *smu) kfree(smu_table->watermarks_table); smu_table->watermarks_table = NULL; + kfree(smu_table->gpu_metrics_table); + smu_table->gpu_metrics_table = NULL; + return 0; } diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index c368db1b834e6..d247f1e8550a2 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -75,8 +75,10 @@ * 10.2 - Reserved * 10.3 - Pass context_save_restore_area size to user-space * 11.0 - Restrict HW mode ops access to trap activate + * 12.0 - Let the debugger specifiy entry sizes for snapshots + * 13.0 - Change address watch to set/clear per target device */ -#define KFD_IOCTL_DBG_MAJOR_VERSION 11 +#define KFD_IOCTL_DBG_MAJOR_VERSION 13 #define KFD_IOCTL_DBG_MINOR_VERSION 0 struct kfd_ioctl_get_version_args { @@ -477,7 +479,7 @@ struct kfd_runtime_info { * exception_mask: exception to clear on snapshot * ptr: user buffer (IN) * data1: number of queue snapshots (IN/OUT) - 0 for IN ignores buffer writes - * data2: unused + * data2: buffer entry size in bytes (IN/OUT) * data3: unused * data4: unused */ @@ -497,11 +499,11 @@ struct kfd_runtime_info { * exception_mask: unused * ptr: unused * data1: watch ID - * data2: unused + * data2: device id * data3: unused * data4: unused */ -#define KFD_IOC_DBG_TRAP_CLEAR_ADDRESS_WATCH 8 +#define KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH 8 /* KFD_IOC_DBG_TRAP_SET_ADDRESS_WATCH: * exception_mask: unused @@ -509,9 +511,9 @@ struct kfd_runtime_info { * data1: Watch ID (OUT) * data2: watch_mode: 0=read, 1=nonread, 2=atomic, 3=all * data3: watch address mask - * data4: unused + * data4: device id */ -#define KFD_IOC_DBG_TRAP_SET_ADDRESS_WATCH 9 +#define KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH 9 /* KFD_IOC_DBG_TRAP_SET_PRECISE_MEM_OPS * exception_mask: unused @@ -540,7 +542,7 @@ struct kfd_runtime_info { * exception_mask: exception to clear on snapshot * ptr: user buffer for 'struct kfd_dbg_device_info_entry' entries (IN) * data1: number of devices in snapshot (IN/OUT) - * data2: unused + * data2: buffer entry size in bytes (IN/OUT) * data3: unused * data4: unused */