diff --git a/Documentation/gpu/amdgpu/thermal.rst b/Documentation/gpu/amdgpu/thermal.rst index 8aeb0186c9ef8..5e27e4eb39596 100644 --- a/Documentation/gpu/amdgpu/thermal.rst +++ b/Documentation/gpu/amdgpu/thermal.rst @@ -63,3 +63,60 @@ gpu_metrics .. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c :doc: gpu_metrics + +GFXOFF +====== + +GFXOFF is a feature found in most recent GPUs that saves power at runtime. The +card's RLC (RunList Controller) firmware powers off the gfx engine +dynamically when there is no workload on gfx or compute pipes. GFXOFF is on by +default on supported GPUs. + +Userspace can interact with GFXOFF through a debugfs interface (all values in +`uint32_t`, unless otherwise noted): + +``amdgpu_gfxoff`` +----------------- + +Use it to enable/disable GFXOFF, and to check if it's current enabled/disabled:: + + $ xxd -l1 -p /sys/kernel/debug/dri/0/amdgpu_gfxoff + 01 + +- Write 0 to disable it, and 1 to enable it. +- Read 0 means it's disabled, 1 it's enabled. + +If it's enabled, that means that the GPU is free to enter into GFXOFF mode as +needed. Disabled means that it will never enter GFXOFF mode. + +``amdgpu_gfxoff_status`` +------------------------ + +Read it to check current GFXOFF's status of a GPU:: + + $ xxd -l1 -p /sys/kernel/debug/dri/0/amdgpu_gfxoff_status + 02 + +- 0: GPU is in GFXOFF state, the gfx engine is powered down. +- 1: Transition out of GFXOFF state +- 2: Not in GFXOFF state +- 3: Transition into GFXOFF state + +If GFXOFF is enabled, the value will be transitioning around [0, 3], always +getting into 0 when possible. When it's disabled, it's always at 2. Returns +``-EINVAL`` if it's not supported. + +``amdgpu_gfxoff_count`` +----------------------- + +Read it to get the total GFXOFF entry count at the time of query since system +power-up. The value is an `uint64_t` type, however, due to firmware limitations, +it can currently overflow as an `uint32_t`. *Only supported in vangogh* + +``amdgpu_gfxoff_residency`` +--------------------------- + +Write 1 to amdgpu_gfxoff_residency to start logging, and 0 to stop. Read it to +get average GFXOFF residency % multiplied by 100 during the last logging +interval. E.g. a value of 7854 means 78.54% of the time in the last logging +interval the GPU was in GFXOFF mode. *Only supported in vangogh* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 1372e2b475418..89d93833f49b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -321,7 +321,7 @@ enum amdgpu_kiq_irq { AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0, AMDGPU_CP_KIQ_IRQ_LAST }; - +#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */ #define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ #define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ #define MAX_KIQ_REG_TRY 1000 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 540178d166929..4c659178ed4b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -164,11 +164,6 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev, int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, enum TLB_FLUSH_TYPE flush_type); -int amdgpu_amdkfd_copy_mem_to_mem(struct amdgpu_device *adev, struct kgd_mem *src_mem, - uint64_t src_offset, struct kgd_mem *dst_mem, - uint64_t dest_offset, uint64_t size, struct dma_fence **f, - uint64_t *actual_size); - bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev); @@ -292,7 +287,7 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_device *adev, uint64_t va, uint64_t size, - void *drm_priv, struct sg_table *sg, struct kgd_mem **mem, + void *drm_priv, struct kgd_mem **mem, uint64_t *offset, uint32_t flags, bool criu_resume); int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 7f02641d3469e..0151a50124d0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1668,11 +1668,12 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev) int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_device *adev, uint64_t va, uint64_t size, - void *drm_priv, struct sg_table *sg, struct kgd_mem **mem, + void *drm_priv, struct kgd_mem **mem, uint64_t *offset, uint32_t flags, bool criu_resume) { struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); enum ttm_bo_type bo_type = ttm_bo_type_device; + struct sg_table *sg = NULL; uint64_t user_addr = 0; struct amdgpu_bo *bo; struct drm_gem_object *gobj = NULL; @@ -1713,10 +1714,6 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( } } - if (sg) { - alloc_domain = AMDGPU_GEM_DOMAIN_CPU; - bo_type = ttm_bo_type_sg; - } *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); if (!*mem) { ret = -ENOMEM; @@ -3250,83 +3247,6 @@ int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem) return 0; } -int amdgpu_amdkfd_copy_mem_to_mem(struct amdgpu_device *adev, struct kgd_mem *src_mem, - uint64_t src_offset, struct kgd_mem *dst_mem, - uint64_t dst_offset, uint64_t size, - struct dma_fence **f, uint64_t *actual_size) -{ - struct amdgpu_copy_mem src, dst; - struct ww_acquire_ctx ticket; - struct list_head list, duplicates; - struct ttm_validate_buffer resv_list[2]; - struct dma_fence *fence = NULL; - int i, r; - - if (!adev|| !src_mem || !dst_mem || !actual_size) - return -EINVAL; - - *actual_size = 0; - - INIT_LIST_HEAD(&list); - INIT_LIST_HEAD(&duplicates); - - src.bo = &src_mem->bo->tbo; - dst.bo = &dst_mem->bo->tbo; - src.mem = src.bo->resource; - dst.mem = dst.bo->resource; - src.offset = src_offset; - dst.offset = dst_offset; - - resv_list[0].bo = src.bo; - resv_list[1].bo = dst.bo; - - for (i = 0; i < 2; i++) { - resv_list[i].num_shared = 1; - list_add_tail(&resv_list[i].head, &list); - } - - r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates); - if (r) { - pr_err("Copy buffer failed. Unable to reserve bo (%d)\n", r); - return r; - } - - /* The process to which the Source and Dest BOs belong to could be - * evicted and the BOs invalidated. So validate BOs before use - */ - r = amdgpu_amdkfd_bo_validate(src_mem->bo, src_mem->domain, false); - if (r) { - pr_err("CMA fail: SRC BO validate failed %d\n", r); - goto validate_fail; - } - - - r = amdgpu_amdkfd_bo_validate(dst_mem->bo, dst_mem->domain, false); - if (r) { - pr_err("CMA fail: DST BO validate failed %d\n", r); - goto validate_fail; - } - - - r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, size, false, NULL, - &fence); - if (r) - pr_err("Copy buffer failed %d\n", r); - else - *actual_size = size; - if (fence) { - amdgpu_bo_fence(src_mem->bo, fence, true); - amdgpu_bo_fence(dst_mem->bo, fence, true); - } - if (f) - *f = dma_fence_get(fence); - dma_fence_put(fence); - -validate_fail: - ttm_eu_backoff_reservation(&ticket, &list); - return r; -} - /* Returns GPU-specific tiling mode information */ int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev, struct tile_config *config) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 6ba5e8ac5e893..0db1aa66a4437 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -957,16 +957,12 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) continue; r = amdgpu_vm_bo_update(adev, bo_va, false); - if (r) { - mutex_unlock(&p->bo_list->bo_list_mutex); + if (r) return r; - } r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update); - if (r) { - mutex_unlock(&p->bo_list->bo_list_mutex); + if (r) return r; - } } r = amdgpu_vm_handle_moved(adev, vm); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 27670770b384e..dd78fb44cc135 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1061,6 +1061,157 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, return r; } +/** + * amdgpu_debugfs_gfxoff_residency_read - Read GFXOFF residency + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + * + * Read the last residency value logged. It doesn't auto update, one needs to + * stop logging before getting the current value. + */ +static ssize_t amdgpu_debugfs_gfxoff_residency_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + while (size) { + uint32_t value; + + r = amdgpu_get_gfx_off_residency(adev, &value); + if (r) + goto out; + + r = put_user(value, (uint32_t *)buf); + if (r) + goto out; + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + r = result; +out: + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + + return r; +} + +/** + * amdgpu_debugfs_gfxoff_residency_write - Log GFXOFF Residency + * + * @f: open file handle + * @buf: User buffer to write data from + * @size: Number of bytes to write + * @pos: Offset to seek to + * + * Write a 32-bit non-zero to start logging; write a 32-bit zero to stop + */ +static ssize_t amdgpu_debugfs_gfxoff_residency_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + while (size) { + u32 value; + + r = get_user(value, (uint32_t *)buf); + if (r) + goto out; + + amdgpu_set_gfx_off_residency(adev, value ? true : false); + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + r = result; +out: + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + + return r; +} + + +/** + * amdgpu_debugfs_gfxoff_count_read - Read GFXOFF entry count + * + * @f: open file handle + * @buf: User buffer to store read data in + * @size: Number of bytes to read + * @pos: Offset to seek to + */ +static ssize_t amdgpu_debugfs_gfxoff_count_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; + } + + while (size) { + u64 value = 0; + + r = amdgpu_get_gfx_off_entrycount(adev, &value); + if (r) + goto out; + + r = put_user(value, (u64 *)buf); + if (r) + goto out; + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + r = result; +out: + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + + return r; +} + /** * amdgpu_debugfs_gfxoff_write - Enable/disable GFXOFF * @@ -1268,6 +1419,19 @@ static const struct file_operations amdgpu_debugfs_gfxoff_status_fops = { .llseek = default_llseek }; +static const struct file_operations amdgpu_debugfs_gfxoff_count_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_gfxoff_count_read, + .llseek = default_llseek +}; + +static const struct file_operations amdgpu_debugfs_gfxoff_residency_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_gfxoff_residency_read, + .write = amdgpu_debugfs_gfxoff_residency_write, + .llseek = default_llseek +}; + static const struct file_operations *debugfs_regs[] = { &amdgpu_debugfs_regs_fops, &amdgpu_debugfs_regs2_fops, @@ -1280,6 +1444,8 @@ static const struct file_operations *debugfs_regs[] = { &amdgpu_debugfs_gpr_fops, &amdgpu_debugfs_gfxoff_fops, &amdgpu_debugfs_gfxoff_status_fops, + &amdgpu_debugfs_gfxoff_count_fops, + &amdgpu_debugfs_gfxoff_residency_fops, }; static const char *debugfs_regs_names[] = { @@ -1294,6 +1460,8 @@ static const char *debugfs_regs_names[] = { "amdgpu_gpr", "amdgpu_gfxoff", "amdgpu_gfxoff_status", + "amdgpu_gfxoff_count", + "amdgpu_gfxoff_residency", }; /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 8f92d207f2b62..69b0a8b089f59 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3594,6 +3594,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); adev->gfx.gfx_off_req_count = 1; + adev->gfx.gfx_off_residency = 0; + adev->gfx.gfx_off_entrycount = 0; adev->pm.ac_power = power_supply_is_system_supplied() > 0; atomic_set(&adev->throttling_logging_enabled, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 454a78ba60d43..ceb91469958aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -610,6 +610,45 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) mutex_unlock(&adev->gfx.gfx_off_mutex); } +int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value) +{ + int r = 0; + + mutex_lock(&adev->gfx.gfx_off_mutex); + + r = amdgpu_dpm_set_residency_gfxoff(adev, value); + + mutex_unlock(&adev->gfx.gfx_off_mutex); + + return r; +} + +int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value) +{ + int r = 0; + + mutex_lock(&adev->gfx.gfx_off_mutex); + + r = amdgpu_dpm_get_residency_gfxoff(adev, value); + + mutex_unlock(&adev->gfx.gfx_off_mutex); + + return r; +} + +int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value) +{ + int r = 0; + + mutex_lock(&adev->gfx.gfx_off_mutex); + + r = amdgpu_dpm_get_entrycount_gfxoff(adev, value); + + mutex_unlock(&adev->gfx.gfx_off_mutex); + + return r; +} + int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 1581067698cab..027e993ff45ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -343,10 +343,12 @@ struct amdgpu_gfx { uint32_t srbm_soft_reset; /* gfx off */ - bool gfx_off_state; /* true: enabled, false: disabled */ - struct mutex gfx_off_mutex; - uint32_t gfx_off_req_count; /* default 1, enable gfx off: dec 1, disable gfx off: add 1 */ - struct delayed_work gfx_off_delay_work; + bool gfx_off_state; /* true: enabled, false: disabled */ + struct mutex gfx_off_mutex; /* mutex to change gfxoff state */ + uint32_t gfx_off_req_count; /* default 1, enable gfx off: dec 1, disable gfx off: add 1 */ + struct delayed_work gfx_off_delay_work; /* async work to set gfx block off */ + uint32_t gfx_off_residency; /* last logged residency */ + uint64_t gfx_off_entrycount; /* count of times GPU has get into GFXOFF state */ /* pipe reservation */ struct mutex pipe_reserve_mutex; @@ -418,6 +420,10 @@ bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me, void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable); int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value); int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); +void amdgpu_gfx_ras_fini(struct amdgpu_device *adev); +int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value); +int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *residency); +int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value); int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, void *err_data, struct amdgpu_iv_entry *entry); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 723721bdd6bf9..1d65e638f376f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -279,10 +279,6 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched) /* Signal all jobs not yet scheduled */ for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { struct drm_sched_rq *rq = &sched->sched_rq[i]; - - if (!rq) - continue; - spin_lock(&rq->lock); list_for_each_entry(s_entity, &rq->entities, list) { while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) { diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c index 33a8a7365aef9..f0e235f98afb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c @@ -28,13 +28,44 @@ #include "navi10_enum.h" #include "soc15_common.h" +#define regATHUB_MISC_CNTL_V3_0_1 0x00d7 +#define regATHUB_MISC_CNTL_V3_0_1_BASE_IDX 0 + + +static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev) +{ + uint32_t data; + + switch (adev->ip_versions[ATHUB_HWIP][0]) { + case IP_VERSION(3, 0, 1): + data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1); + break; + default: + data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); + break; + } + return data; +} + +static void athub_v3_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data) +{ + switch (adev->ip_versions[ATHUB_HWIP][0]) { + case IP_VERSION(3, 0, 1): + WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1, data); + break; + default: + WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data); + break; + } +} + static void athub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, bool enable) { uint32_t def, data; - def = data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); + def = data = athub_v3_0_get_cg_cntl(adev); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_MGCG)) data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; @@ -42,7 +73,7 @@ athub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; if (def != data) - WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data); + athub_v3_0_set_cg_cntl(adev, data); } static void @@ -51,7 +82,7 @@ athub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, { uint32_t def, data; - def = data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); + def = data = athub_v3_0_get_cg_cntl(adev); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_LS)) data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; @@ -59,7 +90,7 @@ athub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; if (def != data) - WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data); + athub_v3_0_set_cg_cntl(adev, data); } int athub_v3_0_set_clockgating(struct amdgpu_device *adev, @@ -70,6 +101,7 @@ int athub_v3_0_set_clockgating(struct amdgpu_device *adev, switch (adev->ip_versions[ATHUB_HWIP][0]) { case IP_VERSION(3, 0, 0): + case IP_VERSION(3, 0, 1): case IP_VERSION(3, 0, 2): athub_v3_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); @@ -88,7 +120,7 @@ void athub_v3_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) int data; /* AMD_CG_SUPPORT_ATHUB_MGCG */ - data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); + data = athub_v3_0_get_cg_cntl(adev); if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) *flags |= AMD_CG_SUPPORT_ATHUB_MGCG; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index f61d7c7c9d262..8992e60c90b22 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4846,7 +4846,7 @@ static int gfx_v10_0_sw_init(void *handle) case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 7): adev->gfx.me.num_me = 1; - adev->gfx.me.num_pipe_per_me = 2; + adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; adev->gfx.mec.num_mec = 2; adev->gfx.mec.num_pipe_per_mec = 4; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 91a1c995df396..253ffa4c62be0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -126,6 +126,8 @@ MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin"); +MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin"); +MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin"); #define mmTCP_CHAN_STEER_0_ARCT 0x0b03 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 @@ -1496,7 +1498,11 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, const struct common_firmware_header *header = NULL; const struct gfx_firmware_header_v1_0 *cp_hdr; - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); + if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec.bin", chip_name); + else + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); + err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); if (err) goto out; @@ -1509,7 +1515,11 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, if (gfx_v9_0_load_mec2_fw_bin_support(adev)) { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); + if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec2.bin", chip_name); + else + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); + err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); if (!err) { err = amdgpu_ucode_validate(adev->gfx.mec2_fw); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 482380fe76db3..750bb16dda36d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -418,6 +418,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint32_t seq; uint16_t queried_pasid; bool ret; + u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout; struct amdgpu_ring *ring = &adev->gfx.kiq.ring; struct amdgpu_kiq *kiq = &adev->gfx.kiq; @@ -436,7 +437,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq.ring_lock); - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); if (r < 1) { dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); return -ETIME; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 4f4aaed3a0974..382d1a18f6f03 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -894,6 +894,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint32_t seq; uint16_t queried_pasid; bool ret; + u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout; struct amdgpu_ring *ring = &adev->gfx.kiq.ring; struct amdgpu_kiq *kiq = &adev->gfx.kiq; @@ -932,7 +933,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq.ring_lock); - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); if (r < 1) { dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); return -ETIME; diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c index 39a696cd45b5e..29c3484ae1f16 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c @@ -40,6 +40,156 @@ static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev, 0); } +static void hdp_v5_2_update_mem_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t hdp_clk_cntl; + uint32_t hdp_mem_pwr_cntl; + + if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD))) + return; + + hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL); + hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL); + + /* Before doing clock/power mode switch, forced on MEM clock */ + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + ATOMIC_MEM_CLK_SOFT_OVERRIDE, 1); + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + RC_MEM_CLK_SOFT_OVERRIDE, 1); + WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl); + + /* disable clock and power gating before any changing */ + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_CTRL_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_LS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_DS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_SD_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_CTRL_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_LS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_SD_EN, 0); + WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); + + /* Already disabled above. The actions below are for "enabled" only */ + if (enable) { + /* only one clock gating mode (LS/DS/SD) can be enabled */ + if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_SD_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_SD_EN, 1); + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_LS_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_LS_EN, 1); + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_DS_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, 1); + } + + /* confirmed that ATOMIC/RC_MEM_POWER_CTRL_EN have to be set for SRAM LS/DS/SD */ + if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD)) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_CTRL_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_CTRL_EN, 1); + WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); + } + } + + /* disable MEM clock override after clock/power mode changing */ + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + ATOMIC_MEM_CLK_SOFT_OVERRIDE, 0); + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + RC_MEM_CLK_SOFT_OVERRIDE, 0); + WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl); +} + +static void hdp_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t hdp_clk_cntl; + + if (!(adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG)) + return; + + hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL); + + if (enable) { + hdp_clk_cntl &= + ~(uint32_t) + (HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK); + } else { + hdp_clk_cntl |= HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK; + } + + WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl); +} + +static void hdp_v5_2_get_clockgating_state(struct amdgpu_device *adev, + u64 *flags) +{ + uint32_t tmp; + + /* AMD_CG_SUPPORT_HDP_MGCG */ + tmp = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL); + if (!(tmp & (HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK))) + *flags |= AMD_CG_SUPPORT_HDP_MGCG; + + /* AMD_CG_SUPPORT_HDP_LS/DS/SD */ + tmp = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL); + if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_LS; + else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_DS_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_DS; + else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_SD_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_SD; +} + +static void hdp_v5_2_update_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + hdp_v5_2_update_mem_power_gating(adev, enable); + hdp_v5_2_update_medium_grain_clock_gating(adev, enable); +} + const struct amdgpu_hdp_funcs hdp_v5_2_funcs = { .flush_hdp = hdp_v5_2_flush_hdp, + .update_clock_gating = hdp_v5_2_update_clock_gating, + .get_clock_gating_state = hdp_v5_2_get_clockgating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index 92dc60a9d2094..085e613f3646d 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -727,6 +727,7 @@ static const struct amd_ip_funcs ih_v6_0_ip_funcs = { static const struct amdgpu_ih_funcs ih_v6_0_funcs = { .get_wptr = ih_v6_0_get_wptr, .decode_iv = amdgpu_ih_decode_iv_helper, + .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper, .set_rptr = ih_v6_0_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c index cac72ced94c85..e8058edc1d108 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c @@ -518,18 +518,41 @@ static u64 mmhub_v3_0_1_get_mc_fb_offset(struct amdgpu_device *adev) static void mmhub_v3_0_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, bool enable) { - //TODO + uint32_t def, data; + + def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG); + + if (enable) + data |= MM_ATC_L2_MISC_CG__ENABLE_MASK; + else + data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK; + + if (def != data) + WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data); } static void mmhub_v3_0_1_update_medium_grain_light_sleep(struct amdgpu_device *adev, bool enable) { - //TODO + uint32_t def, data; + + def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG); + + if (enable) + data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + else + data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + + if (def != data) + WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data); } static int mmhub_v3_0_1_set_clockgating(struct amdgpu_device *adev, enum amd_clockgating_state state) { + if (amdgpu_sriov_vf(adev)) + return 0; + mmhub_v3_0_1_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); mmhub_v3_0_1_update_medium_grain_light_sleep(adev, @@ -539,7 +562,20 @@ static int mmhub_v3_0_1_set_clockgating(struct amdgpu_device *adev, static void mmhub_v3_0_1_get_clockgating(struct amdgpu_device *adev, u64 *flags) { - //TODO + int data; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG); + + /* AMD_CG_SUPPORT_MC_MGCG */ + if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_MC_MGCG; + + /* AMD_CG_SUPPORT_MC_LS */ + if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_MC_LS; } const struct amdgpu_mmhub_funcs mmhub_v3_0_1_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index 4b5396d3e60f6..eec13cb5bf758 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -409,9 +409,11 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev, u32 wptr, tmp; struct amdgpu_ih_regs *ih_regs; - if (ih == &adev->irq.ih) { + if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) { /* Only ring0 supports writeback. On other rings fall back * to register-based code with overflow checking below. + * ih_soft ring doesn't have any backing hardware registers, + * update wptr and return. */ wptr = le32_to_cpu(*ih->wptr_cpu); @@ -483,6 +485,9 @@ static void navi10_ih_set_rptr(struct amdgpu_device *adev, { struct amdgpu_ih_regs *ih_regs; + if (ih == &adev->irq.ih_soft) + return; + if (ih->use_doorbell) { /* XXX check if swapping is necessary on BE */ *ih->rptr_cpu = ih->rptr; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c index a2588200ea580..0b2ac418e4ac4 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -101,6 +101,16 @@ static int psp_v12_0_init_microcode(struct psp_context *psp) adev->psp.dtm_context.context.bin_desc.start_addr = (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr + le32_to_cpu(ta_hdr->dtm.offset_bytes); + + if (adev->apu_flags & AMD_APU_IS_RENOIR) { + adev->psp.securedisplay_context.context.bin_desc.fw_version = + le32_to_cpu(ta_hdr->securedisplay.fw_version); + adev->psp.securedisplay_context.context.bin_desc.size_bytes = + le32_to_cpu(ta_hdr->securedisplay.size_bytes); + adev->psp.securedisplay_context.context.bin_desc.start_addr = + (uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr + + le32_to_cpu(ta_hdr->securedisplay.offset_bytes); + } } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 77d549dbe2a8c..1ff7fc7bb3400 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -577,7 +577,9 @@ static int soc21_common_early_init(void *handle) AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG | AMD_CG_SUPPORT_ATHUB_MGCG | - AMD_CG_SUPPORT_ATHUB_LS; + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_HDP_SD; adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | @@ -594,6 +596,13 @@ static int soc21_common_early_init(void *handle) AMD_CG_SUPPORT_GFX_FGCG | AMD_CG_SUPPORT_REPEATER_FGCG | AMD_CG_SUPPORT_GFX_PERF_CLK | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_IH_CG | AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG; adev->pg_flags = @@ -692,6 +701,7 @@ static int soc21_common_set_clockgating_state(void *handle, switch (adev->ip_versions[NBIO_HWIP][0]) { case IP_VERSION(4, 3, 0): + case IP_VERSION(4, 3, 1): adev->nbio.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, @@ -699,6 +709,10 @@ static int soc21_common_set_clockgating_state(void *handle, adev->hdp.funcs->update_clock_gating(adev, state == AMD_CG_STATE_GATE); break; + case IP_VERSION(7, 7, 0): + adev->hdp.funcs->update_clock_gating(adev, + state == AMD_CG_STATE_GATE); + break; default: break; } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index ca14c3ef742ec..fb2d74f304481 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -1115,7 +1115,7 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) * * Stop VCN block with dpg mode */ -static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +static void vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) { uint32_t tmp; @@ -1133,7 +1133,6 @@ static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) /* disable dynamic power gating mode */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0, ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); - return 0; } /** @@ -1154,7 +1153,7 @@ static int vcn_v4_0_stop(struct amdgpu_device *adev) fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - r = vcn_v4_0_stop_dpg_mode(adev, i); + vcn_v4_0_stop_dpg_mode(adev, i); continue; } diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index cdd599a081258..03b7066471f9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -334,9 +334,11 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev, u32 wptr, tmp; struct amdgpu_ih_regs *ih_regs; - if (ih == &adev->irq.ih) { + if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) { /* Only ring0 supports writeback. On other rings fall back * to register-based code with overflow checking below. + * ih_soft ring doesn't have any backing hardware registers, + * update wptr and return. */ wptr = le32_to_cpu(*ih->wptr_cpu); @@ -409,6 +411,9 @@ static void vega10_ih_set_rptr(struct amdgpu_device *adev, { struct amdgpu_ih_regs *ih_regs; + if (ih == &adev->irq.ih_soft) + return; + if (ih->use_doorbell) { /* XXX check if swapping is necessary on BE */ *ih->rptr_cpu = ih->rptr; diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index 3b4eb8285943c..2022ffbb8dba5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -385,9 +385,11 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev, u32 wptr, tmp; struct amdgpu_ih_regs *ih_regs; - if (ih == &adev->irq.ih) { + if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) { /* Only ring0 supports writeback. On other rings fall back * to register-based code with overflow checking below. + * ih_soft ring doesn't have any backing hardware registers, + * update wptr and return. */ wptr = le32_to_cpu(*ih->wptr_cpu); @@ -461,6 +463,9 @@ static void vega20_ih_set_rptr(struct amdgpu_device *adev, { struct amdgpu_ih_regs *ih_regs; + if (ih == &adev->irq.ih_soft) + return; + if (ih->use_doorbell) { /* XXX check if swapping is necessary on BE */ *ih->rptr_cpu = ih->rptr; diff --git a/drivers/gpu/drm/amd/amdkcl/Makefile b/drivers/gpu/drm/amd/amdkcl/Makefile index 114f3861ac5ff..36e1f19709ea1 100644 --- a/drivers/gpu/drm/amd/amdkcl/Makefile +++ b/drivers/gpu/drm/amd/amdkcl/Makefile @@ -13,7 +13,7 @@ amdkcl-y += kcl_backlight.o kcl_ioctl.o \ kcl_device_cgroup.o kcl_mn.o kcl_drm_modes.o kcl_time.o kcl_ftrace.o \ kcl_acpi_table.o kcl_page_alloc.o kcl_numa.o kcl_fs_read_write.o kcl_drm_aperture.o \ kcl_drm_drv.o kcl_drm_simple_kms_helper.o kcl_bitmap.o kcl_vmscan.o kcl_dma_fence_chain.o \ - kcl_mce_amd.o kcl_workqueue.o + kcl_mce_amd.o kcl_workqueue.o kcl_cpumask.o amdkcl-$(CONFIG_DRM_AMD_DC_HDCP) += kcl_drm_hdcp.o amdkcl-$(CONFIG_MMU_NOTIFIER) += kcl_mn.o diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_cpumask.c b/drivers/gpu/drm/amd/amdkcl/kcl_cpumask.c new file mode 100644 index 0000000000000..fe36b386ff52b --- /dev/null +++ b/drivers/gpu/drm/amd/amdkcl/kcl_cpumask.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#ifndef for_each_cpu_wrap +/* copied from lib/cpumask.c +/** + * cpumask_next_wrap - helper to implement for_each_cpu_wrap + * @n: the cpu prior to the place to search + * @mask: the cpumask pointer + * @start: the start point of the iteration + * @wrap: assume @n crossing @start terminates the iteration + * + * Returns >= nr_cpu_ids on completion + * + * Note: the @wrap argument is required for the start condition when + * we cannot assume @start is set in @mask. + */ +int _kcl_cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap) +{ + int next; + +again: + next = cpumask_next(n, mask); + + if (wrap && n < start && next >= start) { + return nr_cpumask_bits; + + } else if (next >= nr_cpumask_bits) { + wrap = true; + n = -1; + goto again; + } + + return next; +} +EXPORT_SYMBOL(_kcl_cpumask_next_wrap); +#endif + diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index d7fb67b63f4bf..f16b7206c858f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1150,7 +1150,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( dev->adev, args->va_addr, args->size, - pdd->drm_priv, NULL, (struct kgd_mem **) &mem, &offset, + pdd->drm_priv, (struct kgd_mem **) &mem, &offset, flags, false); if (err) @@ -1628,867 +1628,6 @@ static int kfd_ioctl_ipc_import_handle(struct file *filep, return r; } -#ifndef PTRACE_MODE_ATTACH_REALCREDS -#define PTRACE_MODE_ATTACH_REALCREDS PTRACE_MODE_ATTACH -#endif - -/* Maximum number of entries for process pages array which lives on stack */ -#define MAX_PP_STACK_COUNT 16 -/* Maximum number of pages kmalloc'd to hold struct page's during copy */ -#define MAX_KMALLOC_PAGES (PAGE_SIZE * 2) -#define MAX_PP_KMALLOC_COUNT (MAX_KMALLOC_PAGES/sizeof(struct page *)) - -static void kfd_put_sg_table(struct sg_table *sg) -{ - unsigned int i; - struct scatterlist *s; - - for_each_sg(sg->sgl, s, sg->nents, i) - put_page(sg_page(s)); -} - - -/* Create a sg table for the given userptr BO by pinning its system pages - * @bo: userptr BO - * @offset: Offset into BO - * @mm/@task: mm_struct & task_struct of the process that holds the BO - * @size: in/out: desired size / actual size which could be smaller - * @sg_size: out: Size of sg table. This is ALIGN_UP(@size) - * @ret_sg: out sg table - */ -static int kfd_create_sg_table_from_userptr_bo(struct kfd_bo *bo, - int64_t offset, int cma_write, - struct mm_struct *mm, - struct task_struct *task, - uint64_t *size, - uint64_t *sg_size, - struct sg_table **ret_sg) -{ - int ret, locked = 1; - struct sg_table *sg = NULL; - unsigned int i, offset_in_page, flags = 0; - unsigned long nents, n; - unsigned long pa = (bo->cpuva + offset) & PAGE_MASK; - unsigned int cur_page = 0; - struct scatterlist *s; - uint64_t sz = *size; - struct page **process_pages; - - *sg_size = 0; - sg = kmalloc(sizeof(*sg), GFP_KERNEL); - if (!sg) - return -ENOMEM; - - offset_in_page = offset & (PAGE_SIZE - 1); - nents = (sz + offset_in_page + PAGE_SIZE - 1) / PAGE_SIZE; - - ret = sg_alloc_table(sg, nents, GFP_KERNEL); - if (unlikely(ret)) { - ret = -ENOMEM; - goto sg_alloc_fail; - } - process_pages = kmalloc_array(nents, sizeof(struct pages *), - GFP_KERNEL); - if (!process_pages) { - ret = -ENOMEM; - goto page_alloc_fail; - } - - if (cma_write) - flags = FOLL_WRITE; - locked = 1; - mmap_read_lock(mm); - n = kcl_get_user_pages_remote(task, mm, pa, nents, flags, process_pages, - NULL, &locked); - if (locked) - mmap_read_unlock(mm); - if (n <= 0) { - pr_err("CMA: Invalid virtual address 0x%lx\n", pa); - ret = -EFAULT; - goto get_user_fail; - } - if (n != nents) { - /* Pages pinned < requested. Set the size accordingly */ - *size = (n * PAGE_SIZE) - offset_in_page; - pr_debug("Requested %lx but pinned %lx\n", nents, n); - } - - sz = 0; - for_each_sg(sg->sgl, s, n, i) { - sg_set_page(s, process_pages[cur_page], PAGE_SIZE, - offset_in_page); - sg_dma_address(s) = page_to_phys(process_pages[cur_page]); - offset_in_page = 0; - cur_page++; - sz += PAGE_SIZE; - } - *ret_sg = sg; - *sg_size = sz; - - kfree(process_pages); - return 0; - -get_user_fail: - kfree(process_pages); -page_alloc_fail: - sg_free_table(sg); -sg_alloc_fail: - kfree(sg); - return ret; -} - -static void kfd_free_cma_bos(struct cma_iter *ci) -{ - struct cma_system_bo *cma_bo, *tmp; - - list_for_each_entry_safe(cma_bo, tmp, &ci->cma_list, list) { - struct kfd_dev *dev = cma_bo->dev; - struct kfd_process_device *pdd; - - /* sg table is deleted by free_memory_of_gpu */ - if (cma_bo->sg) - kfd_put_sg_table(cma_bo->sg); - pdd = kfd_get_process_device_data(dev, ci->p); - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, cma_bo->mem, pdd->drm_priv, NULL); - list_del(&cma_bo->list); - kfree(cma_bo); - } -} - -/* 1 second timeout */ -#define CMA_WAIT_TIMEOUT msecs_to_jiffies(1000) - -static int kfd_cma_fence_wait(struct dma_fence *f) -{ - int ret; - - ret = dma_fence_wait_timeout(f, false, CMA_WAIT_TIMEOUT); - if (likely(ret > 0)) - return 0; - if (!ret) - ret = -ETIME; - return ret; -} - -/* Put previous (old) fence @pf but it waits for @pf to signal if the context - * of the current fence @cf is different. - */ -static int kfd_fence_put_wait_if_diff_context(struct dma_fence *cf, - struct dma_fence *pf) -{ - int ret = 0; - - if (pf && cf && cf->context != pf->context) - ret = kfd_cma_fence_wait(pf); - dma_fence_put(pf); - return ret; -} - -#define MAX_SYSTEM_BO_SIZE (512*PAGE_SIZE) - -/* Create an equivalent system BO for the given @bo. If @bo is a userptr then - * create a new system BO by pinning underlying system pages of the given - * userptr BO. If @bo is in Local Memory then create an empty system BO and - * then copy @bo into this new BO. - * @bo: Userptr BO or Local Memory BO - * @offset: Offset into bo - * @size: in/out: The size of the new BO could be less than requested if all - * the pages couldn't be pinned or size > MAX_SYSTEM_BO_SIZE. This would - * be reflected in @size - * @mm/@task: mm/task to which @bo belongs to - * @cma_bo: out: new system BO - */ -static int kfd_create_cma_system_bo(struct kfd_dev *kdev, struct kfd_bo *bo, - uint64_t *size, uint64_t offset, - int cma_write, struct kfd_process *p, - struct mm_struct *mm, - struct task_struct *task, - struct cma_system_bo **cma_bo) -{ - int ret; - struct kfd_process_device *pdd = NULL; - struct cma_system_bo *cbo; - uint64_t bo_size = 0; - struct dma_fence *f; - - uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE | - KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE; - - *cma_bo = NULL; - cbo = kzalloc(sizeof(**cma_bo), GFP_KERNEL); - if (!cbo) - return -ENOMEM; - - INIT_LIST_HEAD(&cbo->list); - if (bo->mem_type == KFD_IOC_ALLOC_MEM_FLAGS_VRAM) - bo_size = min_t(uint64_t, *size, MAX_SYSTEM_BO_SIZE); - else if (bo->cpuva) { - ret = kfd_create_sg_table_from_userptr_bo(bo, offset, - cma_write, mm, task, - size, &bo_size, - &cbo->sg); - if (ret) { - pr_err("CMA: BO create with sg failed %d\n", ret); - goto sg_fail; - } - } else { - WARN_ON(1); - ret = -EINVAL; - goto sg_fail; - } - mutex_lock(&p->mutex); - pdd = kfd_get_process_device_data(kdev, p); - if (!pdd) { - mutex_unlock(&p->mutex); - pr_err("Process device data doesn't exist\n"); - ret = -EINVAL; - goto pdd_fail; - } - - ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, 0ULL, bo_size, - pdd->drm_priv, cbo->sg, - &cbo->mem, NULL, flags, - false); - mutex_unlock(&p->mutex); - if (ret) { - pr_err("Failed to create shadow system BO %d\n", ret); - goto pdd_fail; - } - - if (bo->mem_type == KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - ret = amdgpu_amdkfd_copy_mem_to_mem(kdev->adev, bo->mem, - offset, cbo->mem, 0, - bo_size, &f, size); - if (ret) { - pr_err("CMA: Intermediate copy failed %d\n", ret); - goto copy_fail; - } - - /* Wait for the copy to finish as subsequent copy will be done - * by different device - */ - ret = kfd_cma_fence_wait(f); - dma_fence_put(f); - if (ret) { - pr_err("CMA: Intermediate copy timed out %d\n", ret); - goto copy_fail; - } - } - - cbo->dev = kdev; - *cma_bo = cbo; - - return ret; - -copy_fail: - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->adev, bo->mem, pdd->drm_priv, NULL); -pdd_fail: - if (cbo->sg) { - kfd_put_sg_table(cbo->sg); - sg_free_table(cbo->sg); - kfree(cbo->sg); - } -sg_fail: - kfree(cbo); - return ret; -} - -/* Update cma_iter.cur_bo with KFD BO that is assocaited with - * cma_iter.array.va_addr - */ -static int kfd_cma_iter_update_bo(struct cma_iter *ci) -{ - struct kfd_memory_range *arr = ci->array; - uint64_t va_end = arr->va_addr + arr->size - 1; - - mutex_lock(&ci->p->mutex); - ci->cur_bo = kfd_process_find_bo_from_interval(ci->p, arr->va_addr, - va_end); - mutex_unlock(&ci->p->mutex); - - if (!ci->cur_bo || va_end > ci->cur_bo->it.last) { - pr_err("CMA failed. Range out of bounds\n"); - return -EFAULT; - } - return 0; -} - -/* Advance iter by @size bytes. */ -static int kfd_cma_iter_advance(struct cma_iter *ci, unsigned long size) -{ - int ret = 0; - - ci->offset += size; - if (WARN_ON(size > ci->total || ci->offset > ci->array->size)) - return -EFAULT; - ci->total -= size; - /* If current range is copied, move to next range if available. */ - if (ci->offset == ci->array->size) { - - /* End of all ranges */ - if (!(--ci->nr_segs)) - return 0; - - ci->array++; - ci->offset = 0; - ret = kfd_cma_iter_update_bo(ci); - if (ret) - return ret; - } - ci->bo_offset = (ci->array->va_addr + ci->offset) - - ci->cur_bo->it.start; - return ret; -} - -static int kfd_cma_iter_init(struct kfd_memory_range *arr, unsigned long segs, - struct kfd_process *p, struct mm_struct *mm, - struct task_struct *task, struct cma_iter *ci) -{ - int ret; - int nr; - - if (!arr || !segs) - return -EINVAL; - - memset(ci, 0, sizeof(*ci)); - INIT_LIST_HEAD(&ci->cma_list); - ci->array = arr; - ci->nr_segs = segs; - ci->p = p; - ci->offset = 0; - ci->mm = mm; - ci->task = task; - for (nr = 0; nr < segs; nr++) - ci->total += arr[nr].size; - - /* Valid but size is 0. So copied will also be 0 */ - if (!ci->total) - return 0; - - ret = kfd_cma_iter_update_bo(ci); - if (!ret) - ci->bo_offset = arr->va_addr - ci->cur_bo->it.start; - return ret; -} - -static bool kfd_cma_iter_end(struct cma_iter *ci) -{ - if (!(ci->nr_segs) || !(ci->total)) - return true; - return false; -} - -/* Copies @size bytes from si->cur_bo to di->cur_bo BO. The function assumes - * both source and dest. BOs are userptr BOs. Both BOs can either belong to - * current process or one of the BOs can belong to a differnt - * process. @Returns 0 on success, -ve on failure - * - * @si: Source iter - * @di: Dest. iter - * @cma_write: Indicates if it is write to remote or read from remote - * @size: amount of bytes to be copied - * @copied: Return number of bytes actually copied. - */ -static int kfd_copy_userptr_bos(struct cma_iter *si, struct cma_iter *di, - bool cma_write, uint64_t size, - uint64_t *copied) -{ - int i, ret = 0, locked; - unsigned int nents, nl; - unsigned int offset_in_page; - struct page *pp_stack[MAX_PP_STACK_COUNT]; - struct page **process_pages = pp_stack; - unsigned long rva, lva = 0, flags = 0; - uint64_t copy_size, to_copy = size; - struct cma_iter *li, *ri; - - if (cma_write) { - ri = di; - li = si; - flags |= FOLL_WRITE; - } else { - li = di; - ri = si; - } - /* rva: remote virtual address. Page aligned to start page. - * rva + offset_in_page: Points to remote start address - * lva: local virtual address. Points to the start address. - * nents: computes number of remote pages to request - */ - offset_in_page = ri->bo_offset & (PAGE_SIZE - 1); - rva = (ri->cur_bo->cpuva + ri->bo_offset) & PAGE_MASK; - lva = li->cur_bo->cpuva + li->bo_offset; - - nents = (size + offset_in_page + PAGE_SIZE - 1) / PAGE_SIZE; - - copy_size = min_t(uint64_t, size, PAGE_SIZE - offset_in_page); - *copied = 0; - - if (nents > MAX_PP_STACK_COUNT) { - /* For reliability kmalloc only 2 pages worth */ - process_pages = kmalloc(min_t(size_t, MAX_KMALLOC_PAGES, - sizeof(struct pages *)*nents), - GFP_KERNEL); - - if (!process_pages) - return -ENOMEM; - } - - while (nents && to_copy) { - nl = min_t(unsigned int, MAX_PP_KMALLOC_COUNT, nents); - locked = 1; - mmap_read_lock(ri->mm); - nl = kcl_get_user_pages_remote(ri->task, ri->mm, rva, nl, - flags, process_pages, NULL, - &locked); - if (locked) - mmap_read_unlock(ri->mm); - if (nl <= 0) { - pr_err("CMA: Invalid virtual address 0x%lx\n", rva); - ret = -EFAULT; - break; - } - - for (i = 0; i < nl; i++) { - unsigned int n; - void *kaddr = kmap(process_pages[i]); - - if (cma_write) { - n = copy_from_user(kaddr+offset_in_page, - (void *)lva, copy_size); - set_page_dirty(process_pages[i]); - } else { - n = copy_to_user((void *)lva, - kaddr+offset_in_page, - copy_size); - } - kunmap(kaddr); - if (n) { - ret = -EFAULT; - break; - } - to_copy -= copy_size; - if (!to_copy) - break; - lva += copy_size; - rva += (copy_size + offset_in_page); - WARN_ONCE(rva & (PAGE_SIZE - 1), - "CMA: Error in remote VA computation"); - offset_in_page = 0; - copy_size = min_t(uint64_t, to_copy, PAGE_SIZE); - } - - for (i = 0; i < nl; i++) - put_page(process_pages[i]); - - if (ret) - break; - nents -= nl; - } - - if (process_pages != pp_stack) - kfree(process_pages); - - *copied = (size - to_copy); - return ret; - -} - -static int kfd_create_kgd_mem(struct kfd_dev *kdev, uint64_t size, - struct kfd_process *p, struct kgd_mem **mem) -{ - int ret; - struct kfd_process_device *pdd = NULL; - uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE | - KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE; - - if (!mem || !size || !p || !kdev) - return -EINVAL; - - *mem = NULL; - - mutex_lock(&p->mutex); - pdd = kfd_get_process_device_data(kdev, p); - if (!pdd) { - mutex_unlock(&p->mutex); - pr_err("Process device data doesn't exist\n"); - return -EINVAL; - } - - ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, 0ULL, size, - pdd->drm_priv, NULL, - mem, NULL, flags, false); - mutex_unlock(&p->mutex); - if (ret) { - pr_err("Failed to create shadow system BO %d\n", ret); - return -EINVAL; - } - - return 0; -} - -static int kfd_destroy_kgd_mem(struct kgd_mem *mem) -{ - struct amdgpu_device *adev; - struct task_struct *task; - struct kfd_process *p; - struct kfd_process_device *pdd; - uint32_t gpu_id, gpu_idx; - int r; - - if (!mem) - return -EINVAL; - - adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); - task = get_pid_task(mem->process_info->pid, PIDTYPE_PID); - p = kfd_get_process(task); - r = kfd_process_gpuid_from_adev(p, adev, &gpu_id, &gpu_idx); - if (r < 0) { - pr_warn("no gpu id found, mem maybe leaking\n"); - return -EINVAL; - } - pdd = kfd_process_device_from_gpuidx(p, gpu_idx); - - /* param adev is not used*/ - return amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, mem, pdd->drm_priv, NULL); -} - -/* Copies @size bytes from si->cur_bo to di->cur_bo starting at their - * respective offset. - * @si: Source iter - * @di: Dest. iter - * @cma_write: Indicates if it is write to remote or read from remote - * @size: amount of bytes to be copied - * @f: Return the last fence if any - * @copied: Return number of bytes actually copied. - */ -static int kfd_copy_bos(struct cma_iter *si, struct cma_iter *di, - int cma_write, uint64_t size, - struct dma_fence **f, uint64_t *copied, - struct kgd_mem **tmp_mem) -{ - int err = 0; - struct kfd_bo *dst_bo = di->cur_bo, *src_bo = si->cur_bo; - uint64_t src_offset = si->bo_offset, dst_offset = di->bo_offset; - struct kgd_mem *src_mem = src_bo->mem, *dst_mem = dst_bo->mem; - struct kfd_dev *dev = dst_bo->dev; - int d2d = 0; - - *copied = 0; - if (f) - *f = NULL; - if (src_bo->cpuva && dst_bo->cpuva) - return kfd_copy_userptr_bos(si, di, cma_write, size, copied); - - /* If either source or dest. is userptr, create a shadow system BO - * by using the underlying userptr BO pages. Then use this shadow - * BO for copy. src_offset & dst_offset are adjusted because the new BO - * is only created for the window (offset, size) requested. - * The shadow BO is created on the other device. This means if the - * other BO is a device memory, the copy will be using that device. - * The BOs are stored in cma_list for deferred cleanup. This minimizes - * fence waiting just to the last fence. - */ - if (src_bo->cpuva) { - dev = dst_bo->dev; - err = kfd_create_cma_system_bo(dev, src_bo, &size, - si->bo_offset, cma_write, - si->p, si->mm, si->task, - &si->cma_bo); - src_mem = si->cma_bo->mem; - src_offset = si->bo_offset & (PAGE_SIZE - 1); - list_add_tail(&si->cma_bo->list, &si->cma_list); - } else if (dst_bo->cpuva) { - dev = src_bo->dev; - err = kfd_create_cma_system_bo(dev, dst_bo, &size, - di->bo_offset, cma_write, - di->p, di->mm, di->task, - &di->cma_bo); - dst_mem = di->cma_bo->mem; - dst_offset = di->bo_offset & (PAGE_SIZE - 1); - list_add_tail(&di->cma_bo->list, &di->cma_list); - } else if (src_bo->dev->adev != dst_bo->dev->adev) { - /* This indicates that atleast on of the BO is in local mem. - * If both are in local mem of different devices then create an - * intermediate System BO and do a double copy - * [VRAM]--gpu1-->[System BO]--gpu2-->[VRAM]. - * If only one BO is in VRAM then use that GPU to do the copy - */ - if (src_bo->mem_type == KFD_IOC_ALLOC_MEM_FLAGS_VRAM && - dst_bo->mem_type == KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { - dev = dst_bo->dev; - size = min_t(uint64_t, size, MAX_SYSTEM_BO_SIZE); - d2d = 1; - - if (*tmp_mem == NULL) { - if (kfd_create_kgd_mem(src_bo->dev, - MAX_SYSTEM_BO_SIZE, - si->p, - tmp_mem)) - return -EINVAL; - } - - if (amdgpu_amdkfd_copy_mem_to_mem(src_bo->dev->adev, - src_bo->mem, si->bo_offset, - *tmp_mem, 0, - size, f, &size)) - /* tmp_mem will be freed in caller.*/ - return -EINVAL; - - kfd_cma_fence_wait(*f); - dma_fence_put(*f); - - src_mem = *tmp_mem; - src_offset = 0; - } else if (src_bo->mem_type == KFD_IOC_ALLOC_MEM_FLAGS_VRAM) - dev = src_bo->dev; - /* else already set to dst_bo->dev */ - } - - if (err) { - pr_err("Failed to create system BO %d", err); - return -EINVAL; - } - - err = amdgpu_amdkfd_copy_mem_to_mem(dev->adev, src_mem, src_offset, - dst_mem, dst_offset, size, f, - copied); - /* The tmp_bo allocates additional memory. So it is better to wait and - * delete. Also since multiple GPUs are involved the copies are - * currently not pipelined. - */ - if (*tmp_mem && d2d) { - if (!err) { - kfd_cma_fence_wait(*f); - dma_fence_put(*f); - *f = NULL; - } - } - return err; -} - -/* Copy single range from source iterator @si to destination iterator @di. - * @si will move to next range and @di will move by bytes copied. - * @return : 0 for success or -ve for failure - * @f: The last fence if any - * @copied: out: number of bytes copied - */ -static int kfd_copy_single_range(struct cma_iter *si, struct cma_iter *di, - bool cma_write, struct dma_fence **f, - uint64_t *copied, struct kgd_mem **tmp_mem) -{ - int err = 0; - uint64_t copy_size, n; - uint64_t size = si->array->size; - struct kfd_bo *src_bo = si->cur_bo; - struct dma_fence *lfence = NULL; - - if (!src_bo || !di || !copied) - return -EINVAL; - *copied = 0; - if (f) - *f = NULL; - - while (size && !kfd_cma_iter_end(di)) { - struct dma_fence *fence = NULL; - - copy_size = min(size, (di->array->size - di->offset)); - - err = kfd_copy_bos(si, di, cma_write, copy_size, - &fence, &n, tmp_mem); - if (err) { - pr_err("CMA %d failed\n", err); - break; - } - - if (fence) { - err = kfd_fence_put_wait_if_diff_context(fence, - lfence); - lfence = fence; - if (err) - break; - } - - size -= n; - *copied += n; - err = kfd_cma_iter_advance(si, n); - if (err) - break; - err = kfd_cma_iter_advance(di, n); - if (err) - break; - } - - if (f) - *f = dma_fence_get(lfence); - dma_fence_put(lfence); - - return err; -} - -static int kfd_ioctl_cross_memory_copy(struct file *filep, - struct kfd_process *local_p, void *data) -{ - struct kfd_ioctl_cross_memory_copy_args *args = data; - struct kfd_memory_range *src_array, *dst_array; - struct kfd_process *remote_p; - struct task_struct *remote_task; - struct mm_struct *remote_mm; - struct pid *remote_pid; - struct dma_fence *lfence = NULL; - uint64_t copied = 0, total_copied = 0; - struct cma_iter di, si; - const char *cma_op; - int err = 0; - struct kgd_mem *tmp_mem = NULL; - - /* Check parameters */ - if (args->src_mem_range_array == 0 || args->dst_mem_range_array == 0 || - args->src_mem_array_size == 0 || args->dst_mem_array_size == 0) - return -EINVAL; - args->bytes_copied = 0; - - /* Allocate space for source and destination arrays */ - src_array = kmalloc_array((args->src_mem_array_size + - args->dst_mem_array_size), - sizeof(struct kfd_memory_range), - GFP_KERNEL); - if (!src_array) - return -ENOMEM; - dst_array = &src_array[args->src_mem_array_size]; - - if (copy_from_user(src_array, (void __user *)args->src_mem_range_array, - args->src_mem_array_size * - sizeof(struct kfd_memory_range))) { - err = -EFAULT; - goto copy_from_user_fail; - } - if (copy_from_user(dst_array, (void __user *)args->dst_mem_range_array, - args->dst_mem_array_size * - sizeof(struct kfd_memory_range))) { - err = -EFAULT; - goto copy_from_user_fail; - } - - /* Get remote process */ - remote_pid = find_get_pid(args->pid); - if (!remote_pid) { - pr_err("Cross mem copy failed. Invalid PID %d\n", args->pid); - err = -ESRCH; - goto copy_from_user_fail; - } - - remote_task = get_pid_task(remote_pid, PIDTYPE_PID); - if (!remote_pid) { - pr_err("Cross mem copy failed. Invalid PID or task died %d\n", - args->pid); - err = -ESRCH; - goto get_pid_task_fail; - } - - /* Check access permission */ - remote_mm = mm_access(remote_task, PTRACE_MODE_ATTACH_REALCREDS); - if (!remote_mm || IS_ERR(remote_mm)) { - err = IS_ERR(remote_mm) ? PTR_ERR(remote_mm) : -ESRCH; - if (err == -EACCES) { - pr_err("Cross mem copy failed. Permission error\n"); - err = -EPERM; - } else - pr_err("Cross mem copy failed. Invalid task %d\n", - err); - goto mm_access_fail; - } - - remote_p = kfd_get_process(remote_task); - if (IS_ERR(remote_p)) { - pr_err("Cross mem copy failed. Invalid kfd process %d\n", - args->pid); - err = -EINVAL; - goto kfd_process_fail; - } - /* Initialise cma_iter si & @di with source & destination range. */ - if (KFD_IS_CROSS_MEMORY_WRITE(args->flags)) { - cma_op = "WRITE"; - pr_debug("CMA WRITE: local -> remote\n"); - err = kfd_cma_iter_init(dst_array, args->dst_mem_array_size, - remote_p, remote_mm, remote_task, &di); - if (err) - goto kfd_process_fail; - err = kfd_cma_iter_init(src_array, args->src_mem_array_size, - local_p, current->mm, current, &si); - if (err) - goto kfd_process_fail; - } else { - cma_op = "READ"; - pr_debug("CMA READ: remote -> local\n"); - - err = kfd_cma_iter_init(dst_array, args->dst_mem_array_size, - local_p, current->mm, current, &di); - if (err) - goto kfd_process_fail; - err = kfd_cma_iter_init(src_array, args->src_mem_array_size, - remote_p, remote_mm, remote_task, &si); - if (err) - goto kfd_process_fail; - } - - /* Copy one si range at a time into di. After each call to - * kfd_copy_single_range() si will move to next range. di will be - * incremented by bytes copied - */ - while (!kfd_cma_iter_end(&si) && !kfd_cma_iter_end(&di)) { - struct dma_fence *fence = NULL; - - err = kfd_copy_single_range(&si, &di, - KFD_IS_CROSS_MEMORY_WRITE(args->flags), - &fence, &copied, &tmp_mem); - total_copied += copied; - - if (err) - break; - - /* Release old fence if a later fence is created. If no - * new fence is created, then keep the preivous fence - */ - if (fence) { - err = kfd_fence_put_wait_if_diff_context(fence, - lfence); - lfence = fence; - if (err) - break; - } - } - - /* Wait for the last fence irrespective of error condition */ - if (lfence) { - err = kfd_cma_fence_wait(lfence); - dma_fence_put(lfence); - if (err) - pr_err("CMA %s failed. BO timed out\n", cma_op); - } - - if (tmp_mem) - kfd_destroy_kgd_mem(tmp_mem); - - kfd_free_cma_bos(&si); - kfd_free_cma_bos(&di); - -kfd_process_fail: - mmput(remote_mm); -mm_access_fail: - put_task_struct(remote_task); -get_pid_task_fail: - put_pid(remote_pid); -copy_from_user_fail: - kfree(src_array); - - /* An error could happen after partial copy. In that case this will - * reflect partial amount of bytes copied - */ - args->bytes_copied = total_copied; - return err; -} - static int kfd_ioctl_dbg_set_debug_trap(struct file *filep, struct kfd_process *p, void *data) { @@ -3447,7 +2586,7 @@ static int criu_restore_memory_of_gpu_ipc(struct kfd_process_device *pdd, */ ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(dev->adev, bo_bucket->addr, bo_bucket->size, pdd->drm_priv, - NULL, kgd_mem, &offset, + kgd_mem, &offset, bo_bucket->alloc_flags, true); if (ret) { pr_err("Could not create the BO\n"); @@ -3538,7 +2677,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd, } /* Create the BO */ ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(pdd->dev->adev, bo_bucket->addr, - bo_bucket->size, pdd->drm_priv, NULL, kgd_mem, + bo_bucket->size, pdd->drm_priv, kgd_mem, &offset, bo_bucket->alloc_flags, criu_resume); if (ret) { pr_err("Could not create the BO\n"); @@ -4086,9 +3225,6 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_IPC_EXPORT_HANDLE, kfd_ioctl_ipc_export_handle, 0), - AMDKFD_IOCTL_DEF(AMDKFD_IOC_CROSS_MEMORY_COPY, - kfd_ioctl_cross_memory_copy, 0), - AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_TRAP, kfd_ioctl_dbg_set_debug_trap, 0), diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 07b9fe4016a60..bc99d2d490abf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "kfd_priv.h" #include "kfd_device_queue_manager.h" #include "kfd_pm4_headers_vi.h" @@ -102,13 +103,18 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd) switch (sdma_version) { case IP_VERSION(6, 0, 0): - case IP_VERSION(6, 0, 1): case IP_VERSION(6, 0, 2): /* Reserve 1 for paging and 1 for gfx */ kfd->device_info.num_reserved_sdma_queues_per_engine = 2; /* BIT(0)=engine-0 queue-0; BIT(1)=engine-1 queue-0; BIT(2)=engine-0 queue-1; ... */ kfd->device_info.reserved_sdma_queues_bitmap = 0xFULL; break; + case IP_VERSION(6, 0, 1): + /* Reserve 1 for paging and 1 for gfx */ + kfd->device_info.num_reserved_sdma_queues_per_engine = 2; + /* BIT(0)=engine-0 queue-0; BIT(1)=engine-0 queue-1; ... */ + kfd->device_info.reserved_sdma_queues_bitmap = 0x3ULL; + break; default: break; } @@ -814,13 +820,24 @@ static inline void kfd_queue_work(struct workqueue_struct *wq, struct work_struct *work) { int cpu, new_cpu; + const struct cpumask *mask = NULL; cpu = new_cpu = smp_processor_id(); - do { - new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids; - if (cpu_to_node(new_cpu) == numa_node_id()) + +#if defined(CONFIG_SCHED_SMT) + /* CPU threads in the same core */ + mask = cpu_smt_mask(cpu); +#endif + if (!mask || cpumask_weight(mask) <= 1) + /* CPU threads in the same NUMA node */ + mask = cpu_cpu_mask(cpu); + /* Pick the next online CPU thread in the same core or NUMA node */ + for_each_cpu_wrap(cpu, mask, cpu+1) { + if (cpu != new_cpu && cpu_online(cpu)) { + new_cpu = cpu; break; - } while (cpu != new_cpu); + } + } queue_work_on(new_cpu, wq, work); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index db2a8a070b695..182eb67edbc52 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -383,38 +383,6 @@ struct kfd_bo { unsigned int mem_type; }; -struct cma_system_bo { - struct kgd_mem *mem; - struct sg_table *sg; - struct kfd_dev *dev; - struct list_head list; -}; - -/* Similar to iov_iter */ -struct cma_iter { - /* points to current entry of range array */ - struct kfd_memory_range *array; - /* total number of entries in the initial array */ - unsigned long nr_segs; - /* total amount of data pointed by kfd array*/ - unsigned long total; - /* offset into the entry pointed by cma_iter.array */ - unsigned long offset; - struct kfd_process *p; - struct mm_struct *mm; - struct task_struct *task; - /* current kfd_bo associated with cma_iter.array.va_addr */ - struct kfd_bo *cur_bo; - /* offset w.r.t cur_bo */ - unsigned long bo_offset; - /* If cur_bo is a userptr BO, then a shadow system BO is created - * using its underlying pages. cma_bo holds this BO. cma_list is a - * list cma_bos created in one session - */ - struct cma_system_bo *cma_bo; - struct list_head cma_list; -}; - enum kfd_mempool { KFD_MEMPOOL_SYSTEM_CACHEABLE = 1, KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 43697b3e4c9c2..5198dd636765a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -717,7 +717,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, int err; err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, gpu_va, size, - pdd->drm_priv, NULL, mem, NULL, + pdd->drm_priv, mem, NULL, flags, false); if (err) goto err_alloc_mem; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 4b9d2a15fb085..e5708de28ff3b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -545,7 +545,6 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, kfree(svm_bo); return -ESRCH; } - svm_bo->svms = prange->svms; svm_bo->eviction_fence = amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), mm, @@ -3277,7 +3276,6 @@ int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence) static void svm_range_evict_svm_bo_worker(struct work_struct *work) { struct svm_range_bo *svm_bo; - struct kfd_process *p; struct mm_struct *mm; int r = 0; @@ -3285,13 +3283,12 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) if (!svm_bo_ref_unless_zero(svm_bo)) return; /* svm_bo was freed while eviction was pending */ - /* svm_range_bo_release destroys this worker thread. So during - * the lifetime of this thread, kfd_process and mm will be valid. - */ - p = container_of(svm_bo->svms, struct kfd_process, svms); - mm = p->mm; - if (!mm) + if (mmget_not_zero(svm_bo->eviction_fence->mm)) { + mm = svm_bo->eviction_fence->mm; + } else { + svm_range_bo_unref(svm_bo); return; + } mmap_read_lock(mm); spin_lock(&svm_bo->list_lock); @@ -3309,8 +3306,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) mutex_lock(&prange->migrate_mutex); do { - r = svm_migrate_vram_to_ram(prange, - svm_bo->eviction_fence->mm, + r = svm_migrate_vram_to_ram(prange, mm, KFD_MIGRATE_TRIGGER_TTM_EVICTION); } while (!r && prange->actual_loc && --retries); @@ -3328,6 +3324,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) } spin_unlock(&svm_bo->list_lock); mmap_read_unlock(mm); + mmput(mm); dma_fence_signal(&svm_bo->eviction_fence->base); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 9156b041ef175..cfac13ad06ef0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -46,7 +46,6 @@ struct svm_range_bo { spinlock_t list_lock; struct amdgpu_amdkfd_fence *eviction_fence; struct work_struct eviction_work; - struct svm_range_list *svms; uint32_t evicting; struct work_struct release_work; }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 779d3f2374664..f396b28327808 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1494,8 +1494,8 @@ static int kfd_build_p2p_node_entry(struct kfd_topology_device *dev, static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int gpu_node) { + struct kfd_iolink_properties *gpu_link, *tmp_link, *cpu_link; struct kfd_iolink_properties *props = NULL, *props2 = NULL; - struct kfd_iolink_properties *gpu_link, *cpu_link; struct kfd_topology_device *cpu_dev; int ret = 0; int i, num_cpu; @@ -1518,16 +1518,19 @@ static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int g continue; /* find CPU <--> CPU links */ + cpu_link = NULL; cpu_dev = kfd_topology_device_by_proximity_domain(i); if (cpu_dev) { - list_for_each_entry(cpu_link, + list_for_each_entry(tmp_link, &cpu_dev->io_link_props, list) { - if (cpu_link->node_to == gpu_link->node_to) + if (tmp_link->node_to == gpu_link->node_to) { + cpu_link = tmp_link; break; + } } } - if (cpu_link->node_to != gpu_link->node_to) + if (!cpu_link) return -ENOMEM; /* CPU <--> CPU <--> GPU, GPU node*/ diff --git a/drivers/gpu/drm/amd/backport/backport.h b/drivers/gpu/drm/amd/backport/backport.h index 1992b972a1bdd..b89f0fe0664ee 100644 --- a/drivers/gpu/drm/amd/backport/backport.h +++ b/drivers/gpu/drm/amd/backport/backport.h @@ -105,5 +105,6 @@ #include #include #include +#include #endif /* AMDGPU_BACKPORT_H */ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 24bd6b469a00e..191b1f0fd3452 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -818,14 +818,18 @@ bool dm_helpers_dp_write_dsc_enable( const struct dc_stream_state *stream, bool enable) { +#if defined(HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX) static const uint8_t DSC_DISABLE; static const uint8_t DSC_DECODING = 0x01; static const uint8_t DSC_PASSTHROUGH = 0x02; - struct amdgpu_dm_connector *aconnector; struct drm_dp_mst_port *port; uint8_t enable_dsc = enable ? DSC_DECODING : DSC_DISABLE; uint8_t enable_passthrough = enable ? DSC_PASSTHROUGH : DSC_DISABLE; +#else + uint8_t enable_dsc = enable ? 1 : 0; +#endif + struct amdgpu_dm_connector *aconnector; uint8_t ret = 0; if (!stream) @@ -844,7 +848,7 @@ bool dm_helpers_dp_write_dsc_enable( return write_dsc_enable_synaptics_non_virtual_dpcd_mst( aconnector->dsc_aux, stream, enable_dsc); #endif - +#if defined(HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX) port = aconnector->port; if (enable) { @@ -878,6 +882,11 @@ bool dm_helpers_dp_write_dsc_enable( ret); } } +#else + ret = drm_dp_dpcd_write(aconnector->dsc_aux, DP_DSC_ENABLE, &enable_dsc, 1); + DC_LOG_DC("Send DSC %s to MST RX\n", enable_dsc ? "enable" : "disable"); +#endif + } if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT || stream->signal == SIGNAL_TYPE_EDP) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 1b9ccce0a78e4..c706b2d08b956 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1568,8 +1568,6 @@ bool pre_validate_dsc(struct drm_atomic_state *state, return (ret == 0); } -#endif - static unsigned int kbps_from_pbn(unsigned int pbn) { unsigned int kbps = pbn; @@ -1597,17 +1595,20 @@ static bool is_dsc_common_config_possible(struct dc_stream_state *stream, return bw_range->max_target_bpp_x16 && bw_range->min_target_bpp_x16; } +#endif /* CONFIG_DRM_AMD_DC_DCN */ enum dc_status dm_dp_mst_is_port_support_mode( struct amdgpu_dm_connector *aconnector, struct dc_stream_state *stream) { + int bpp, pbn, branch_max_throughput_mps = 0; +#if defined(CONFIG_DRM_AMD_DC_DCN) +#ifdef HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX struct dc_link_settings cur_link_settings; unsigned int end_to_end_bw_in_kbps = 0; unsigned int upper_link_bw_in_kbps = 0, down_link_bw_in_kbps = 0; unsigned int max_compressed_bw_in_kbps = 0; struct dc_dsc_bw_range bw_range = {0}; - int bpp, pbn, branch_max_throughput_mps = 0; /* * check if the mode could be supported if DSC pass-through is supported @@ -1642,13 +1643,19 @@ enum dc_status dm_dp_mst_is_port_support_mode( return DC_FAIL_BANDWIDTH_VALIDATE; } } else { +#endif +#endif /* check if mode could be supported within full_pbn */ bpp = convert_dc_color_depth_into_bpc(stream->timing.display_color_depth) * 3; pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp, false); if (pbn > aconnector->port->full_pbn) return DC_FAIL_BANDWIDTH_VALIDATE; +#if defined(CONFIG_DRM_AMD_DC_DCN) +#ifdef HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX } +#endif +#endif /* check is mst dsc output bandwidth branch_overall_throughput_0_mps */ switch (stream->timing.pixel_encoding) { diff --git a/drivers/gpu/drm/amd/display/dc/basics/conversion.c b/drivers/gpu/drm/amd/display/dc/basics/conversion.c index 6767fab55c260..352e9afb85c6d 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/conversion.c +++ b/drivers/gpu/drm/amd/display/dc/basics/conversion.c @@ -100,3 +100,24 @@ void convert_float_matrix( matrix[i] = (uint16_t)reg_value; } } + +static uint32_t find_gcd(uint32_t a, uint32_t b) +{ + uint32_t remainder = 0; + while (b != 0) { + remainder = a % b; + a = b; + b = remainder; + } + return a; +} + +void reduce_fraction(uint32_t num, uint32_t den, + uint32_t *out_num, uint32_t *out_den) +{ + uint32_t gcd = 0; + + gcd = find_gcd(num, den); + *out_num = num / gcd; + *out_den = den / gcd; +} diff --git a/drivers/gpu/drm/amd/display/dc/basics/conversion.h b/drivers/gpu/drm/amd/display/dc/basics/conversion.h index ade785c4fdc7d..81da4e6f7a1ac 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/conversion.h +++ b/drivers/gpu/drm/amd/display/dc/basics/conversion.h @@ -38,6 +38,9 @@ void convert_float_matrix( struct fixed31_32 *flt, uint32_t buffer_size); +void reduce_fraction(uint32_t num, uint32_t den, + uint32_t *out_num, uint32_t *out_den); + static inline unsigned int log_2(unsigned int num) { return ilog2(num); diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c index 7af19823a29db..beb025cd3dc29 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c @@ -415,7 +415,7 @@ static struct wm_table lpddr5_wm_table = { } }; -static DpmClocks_t dummy_clocks; +static DpmClocks314_t dummy_clocks; static struct dcn314_watermarks dummy_wms = { 0 }; @@ -500,7 +500,7 @@ static void dcn314_notify_wm_ranges(struct clk_mgr *clk_mgr_base) static void dcn314_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr, struct dcn314_smu_dpm_clks *smu_dpm_clks) { - DpmClocks_t *table = smu_dpm_clks->dpm_clks; + DpmClocks314_t *table = smu_dpm_clks->dpm_clks; if (!clk_mgr->smu_ver) return; @@ -517,6 +517,26 @@ static void dcn314_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr, dcn314_smu_transfer_dpm_table_smu_2_dram(clk_mgr); } +static inline bool is_valid_clock_value(uint32_t clock_value) +{ + return clock_value > 1 && clock_value < 100000; +} + +static unsigned int convert_wck_ratio(uint8_t wck_ratio) +{ + switch (wck_ratio) { + case WCK_RATIO_1_2: + return 2; + + case WCK_RATIO_1_4: + return 4; + + default: + break; + } + return 1; +} + static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks) { uint32_t max = 0; @@ -530,89 +550,127 @@ static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks) return max; } -static unsigned int find_clk_for_voltage( - const DpmClocks_t *clock_table, - const uint32_t clocks[], - unsigned int voltage) -{ - int i; - int max_voltage = 0; - int clock = 0; - - for (i = 0; i < NUM_SOC_VOLTAGE_LEVELS; i++) { - if (clock_table->SocVoltage[i] == voltage) { - return clocks[i]; - } else if (clock_table->SocVoltage[i] >= max_voltage && - clock_table->SocVoltage[i] < voltage) { - max_voltage = clock_table->SocVoltage[i]; - clock = clocks[i]; - } - } - - ASSERT(clock); - return clock; -} - static void dcn314_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk_mgr, struct integrated_info *bios_info, - const DpmClocks_t *clock_table) + const DpmClocks314_t *clock_table) { - int i, j; struct clk_bw_params *bw_params = clk_mgr->base.bw_params; - uint32_t max_dispclk = 0, max_dppclk = 0; - - j = -1; - - ASSERT(NUM_DF_PSTATE_LEVELS <= MAX_NUM_DPM_LVL); - - /* Find lowest DPM, FCLK is filled in reverse order*/ + struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1]; + uint32_t max_pstate = 0, max_fclk = 0, min_pstate = 0, max_dispclk = 0, max_dppclk = 0; + int i; - for (i = NUM_DF_PSTATE_LEVELS - 1; i >= 0; i--) { - if (clock_table->DfPstateTable[i].FClk != 0) { - j = i; - break; + /* Find highest valid fclk pstate */ + for (i = 0; i < clock_table->NumDfPstatesEnabled; i++) { + if (is_valid_clock_value(clock_table->DfPstateTable[i].FClk) && + clock_table->DfPstateTable[i].FClk > max_fclk) { + max_fclk = clock_table->DfPstateTable[i].FClk; + max_pstate = i; } } - if (j == -1) { - /* clock table is all 0s, just use our own hardcode */ - ASSERT(0); - return; - } - - bw_params->clk_table.num_entries = j + 1; + /* We expect the table to contain at least one valid fclk entry. */ + ASSERT(is_valid_clock_value(max_fclk)); - /* dispclk and dppclk can be max at any voltage, same number of levels for both */ + /* Dispclk and dppclk can be max at any voltage, same number of levels for both */ if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS && clock_table->NumDispClkLevelsEnabled <= NUM_DPPCLK_DPM_LEVELS) { max_dispclk = find_max_clk_value(clock_table->DispClocks, clock_table->NumDispClkLevelsEnabled); max_dppclk = find_max_clk_value(clock_table->DppClocks, clock_table->NumDispClkLevelsEnabled); } else { + /* Invalid number of entries in the table from PMFW. */ ASSERT(0); } - for (i = 0; i < bw_params->clk_table.num_entries; i++, j--) { - bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[j].FClk; - bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[j].MemClk; - bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[j].Voltage; - switch (clock_table->DfPstateTable[j].WckRatio) { - case WCK_RATIO_1_2: - bw_params->clk_table.entries[i].wck_ratio = 2; - break; - case WCK_RATIO_1_4: - bw_params->clk_table.entries[i].wck_ratio = 4; - break; - default: - bw_params->clk_table.entries[i].wck_ratio = 1; + /* Base the clock table on dcfclk, need at least one entry regardless of pmfw table */ + for (i = 0; i < clock_table->NumDcfClkLevelsEnabled; i++) { + uint32_t min_fclk = clock_table->DfPstateTable[0].FClk; + int j; + + for (j = 1; j < clock_table->NumDfPstatesEnabled; j++) { + if (is_valid_clock_value(clock_table->DfPstateTable[j].FClk) && + clock_table->DfPstateTable[j].FClk < min_fclk && + clock_table->DfPstateTable[j].Voltage <= clock_table->SocVoltage[i]) { + min_fclk = clock_table->DfPstateTable[j].FClk; + min_pstate = j; + } } - bw_params->clk_table.entries[i].dcfclk_mhz = find_clk_for_voltage(clock_table, clock_table->DcfClocks, clock_table->DfPstateTable[j].Voltage); - bw_params->clk_table.entries[i].socclk_mhz = find_clk_for_voltage(clock_table, clock_table->SocClocks, clock_table->DfPstateTable[j].Voltage); + + /* First search defaults for the clocks we don't read using closest lower or equal default dcfclk */ + for (j = bw_params->clk_table.num_entries - 1; j > 0; j--) + if (bw_params->clk_table.entries[j].dcfclk_mhz <= clock_table->DcfClocks[i]) + break; + + bw_params->clk_table.entries[i].phyclk_mhz = bw_params->clk_table.entries[j].phyclk_mhz; + bw_params->clk_table.entries[i].phyclk_d18_mhz = bw_params->clk_table.entries[j].phyclk_d18_mhz; + bw_params->clk_table.entries[i].dtbclk_mhz = bw_params->clk_table.entries[j].dtbclk_mhz; + + /* Now update clocks we do read */ + bw_params->clk_table.entries[i].fclk_mhz = min_fclk; + bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[min_pstate].MemClk; + bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[min_pstate].Voltage; + bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[i]; + bw_params->clk_table.entries[i].socclk_mhz = clock_table->SocClocks[i]; + bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk; + bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk; + bw_params->clk_table.entries[i].wck_ratio = convert_wck_ratio( + clock_table->DfPstateTable[min_pstate].WckRatio); + }; + + /* Make sure to include at least one entry at highest pstate */ + if (max_pstate != min_pstate || i == 0) { + if (i > MAX_NUM_DPM_LVL - 1) + i = MAX_NUM_DPM_LVL - 1; + + bw_params->clk_table.entries[i].fclk_mhz = max_fclk; + bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[max_pstate].MemClk; + bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[max_pstate].Voltage; + bw_params->clk_table.entries[i].dcfclk_mhz = find_max_clk_value(clock_table->DcfClocks, NUM_DCFCLK_DPM_LEVELS); + bw_params->clk_table.entries[i].socclk_mhz = find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS); bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk; bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk; + bw_params->clk_table.entries[i].wck_ratio = convert_wck_ratio( + clock_table->DfPstateTable[max_pstate].WckRatio); + i++; } + bw_params->clk_table.num_entries = i--; + + /* Make sure all highest clocks are included*/ + bw_params->clk_table.entries[i].socclk_mhz = find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS); + bw_params->clk_table.entries[i].dispclk_mhz = find_max_clk_value(clock_table->DispClocks, NUM_DISPCLK_DPM_LEVELS); + bw_params->clk_table.entries[i].dppclk_mhz = find_max_clk_value(clock_table->DppClocks, NUM_DPPCLK_DPM_LEVELS); + ASSERT(clock_table->DcfClocks[i] == find_max_clk_value(clock_table->DcfClocks, NUM_DCFCLK_DPM_LEVELS)); + bw_params->clk_table.entries[i].phyclk_mhz = def_max.phyclk_mhz; + bw_params->clk_table.entries[i].phyclk_d18_mhz = def_max.phyclk_d18_mhz; + bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz; + /* + * Set any 0 clocks to max default setting. Not an issue for + * power since we aren't doing switching in such case anyway + */ + for (i = 0; i < bw_params->clk_table.num_entries; i++) { + if (!bw_params->clk_table.entries[i].fclk_mhz) { + bw_params->clk_table.entries[i].fclk_mhz = def_max.fclk_mhz; + bw_params->clk_table.entries[i].memclk_mhz = def_max.memclk_mhz; + bw_params->clk_table.entries[i].voltage = def_max.voltage; + } + if (!bw_params->clk_table.entries[i].dcfclk_mhz) + bw_params->clk_table.entries[i].dcfclk_mhz = def_max.dcfclk_mhz; + if (!bw_params->clk_table.entries[i].socclk_mhz) + bw_params->clk_table.entries[i].socclk_mhz = def_max.socclk_mhz; + if (!bw_params->clk_table.entries[i].dispclk_mhz) + bw_params->clk_table.entries[i].dispclk_mhz = def_max.dispclk_mhz; + if (!bw_params->clk_table.entries[i].dppclk_mhz) + bw_params->clk_table.entries[i].dppclk_mhz = def_max.dppclk_mhz; + if (!bw_params->clk_table.entries[i].phyclk_mhz) + bw_params->clk_table.entries[i].phyclk_mhz = def_max.phyclk_mhz; + if (!bw_params->clk_table.entries[i].phyclk_d18_mhz) + bw_params->clk_table.entries[i].phyclk_d18_mhz = def_max.phyclk_d18_mhz; + if (!bw_params->clk_table.entries[i].dtbclk_mhz) + bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz; + } + ASSERT(bw_params->clk_table.entries[i-1].dcfclk_mhz); bw_params->vram_type = bios_info->memory_type; - bw_params->num_channels = bios_info->ma_channel_number; + bw_params->num_channels = bios_info->ma_channel_number ? bios_info->ma_channel_number : 4; for (i = 0; i < WM_SET_COUNT; i++) { bw_params->wm_table.entries[i].wm_inst = i; @@ -671,10 +729,10 @@ void dcn314_clk_mgr_construct( } ASSERT(clk_mgr->smu_wm_set.wm_set); - smu_dpm_clks.dpm_clks = (DpmClocks_t *)dm_helpers_allocate_gpu_mem( + smu_dpm_clks.dpm_clks = (DpmClocks314_t *)dm_helpers_allocate_gpu_mem( clk_mgr->base.base.ctx, DC_MEM_ALLOC_TYPE_FRAME_BUFFER, - sizeof(DpmClocks_t), + sizeof(DpmClocks314_t), &smu_dpm_clks.mc_address.quad_part); if (smu_dpm_clks.dpm_clks == NULL) { @@ -719,7 +777,7 @@ void dcn314_clk_mgr_construct( if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) { dcn314_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks); - if (ctx->dc_bios && ctx->dc_bios->integrated_info) { + if (ctx->dc_bios && ctx->dc_bios->integrated_info && ctx->dc->config.use_default_clock_table == false) { dcn314_clk_mgr_helper_populate_bw_params( &clk_mgr->base, ctx->dc_bios->integrated_info, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h index a7958dc965810..047d19ea919c7 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h @@ -36,6 +36,37 @@ typedef enum { WCK_RATIO_MAX } WCK_RATIO_e; +typedef struct { + uint32_t FClk; + uint32_t MemClk; + uint32_t Voltage; + uint8_t WckRatio; + uint8_t Spare[3]; +} DfPstateTable314_t; + +//Freq in MHz +//Voltage in milli volts with 2 fractional bits +typedef struct { + uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS]; + uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS]; + uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS]; + uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS]; + uint32_t VClocks[NUM_VCN_DPM_LEVELS]; + uint32_t DClocks[NUM_VCN_DPM_LEVELS]; + uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS]; + DfPstateTable314_t DfPstateTable[NUM_DF_PSTATE_LEVELS]; + + uint8_t NumDcfClkLevelsEnabled; + uint8_t NumDispClkLevelsEnabled; //Applies to both Dispclk and Dppclk + uint8_t NumSocClkLevelsEnabled; + uint8_t VcnClkLevelsEnabled; //Applies to both Vclk and Dclk + uint8_t NumDfPstatesEnabled; + uint8_t spare[3]; + + uint32_t MinGfxClk; + uint32_t MaxGfxClk; +} DpmClocks314_t; + struct dcn314_watermarks { // Watermarks WatermarkRowGeneric_t WatermarkRow[WM_COUNT][NUM_WM_RANGES]; @@ -43,7 +74,7 @@ struct dcn314_watermarks { }; struct dcn314_smu_dpm_clks { - DpmClocks_t *dpm_clks; + DpmClocks314_t *dpm_clks; union large_integer mc_address; }; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c index cc076621f5e66..bff0f57e7fe61 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c @@ -507,7 +507,7 @@ static void dcn315_clk_mgr_helper_populate_bw_params( bw_params->clk_table.entries[i].dispclk_mhz = clock_table->DispClocks[i]; bw_params->clk_table.entries[i].dppclk_mhz = clock_table->DppClocks[i]; bw_params->clk_table.entries[i].wck_ratio = 1; - }; + } /* Make sure to include at least one entry and highest pstate */ if (max_pstate != min_pstate || i == 0) { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index cf99097887076..719ba38a10b8a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1081,8 +1081,15 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) struct dc_stream_state *old_stream = dc->current_state->res_ctx.pipe_ctx[i].stream; bool should_disable = true; - bool pipe_split_change = - context->res_ctx.pipe_ctx[i].top_pipe != dc->current_state->res_ctx.pipe_ctx[i].top_pipe; + bool pipe_split_change = false; + + if ((context->res_ctx.pipe_ctx[i].top_pipe) && + (dc->current_state->res_ctx.pipe_ctx[i].top_pipe)) + pipe_split_change = context->res_ctx.pipe_ctx[i].top_pipe->pipe_idx != + dc->current_state->res_ctx.pipe_ctx[i].top_pipe->pipe_idx; + else + pipe_split_change = context->res_ctx.pipe_ctx[i].top_pipe != + dc->current_state->res_ctx.pipe_ctx[i].top_pipe; for (j = 0; j < context->stream_count; j++) { if (old_stream == context->streams[j]) { diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 6621f608b5a98..14361bc91ccb1 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -47,7 +47,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.197" +#define DC_VER "3.2.198" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -404,6 +404,7 @@ struct dc_config { bool use_pipe_ctx_sync_logic; bool ignore_dpref_ss; bool enable_mipi_converter_optimization; + bool use_default_clock_table; }; enum visual_confirm { diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index c8059c28ac494..09b304507badb 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -29,6 +29,7 @@ #include "dm_helpers.h" #include "dc_hw_types.h" #include "core_types.h" +#include "../basics/conversion.h" #define CTX dc_dmub_srv->ctx #define DC_LOGGER CTX->logger @@ -600,6 +601,7 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc, &cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[cmd_pipe_index]; struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing; struct dc_crtc_timing *phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing; + uint32_t out_num, out_den; pipe_data->mode = SUBVP; pipe_data->pipe_config.subvp_data.pix_clk_100hz = subvp_pipe->stream->timing.pix_clk_100hz; @@ -613,6 +615,15 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc, pipe_data->pipe_config.subvp_data.main_pipe_index = subvp_pipe->pipe_idx; pipe_data->pipe_config.subvp_data.is_drr = subvp_pipe->stream->ignore_msa_timing_param; + /* Calculate the scaling factor from the src and dst height. + * e.g. If 3840x2160 being downscaled to 1920x1080, the scaling factor is 1/2. + * Reduce the fraction 1080/2160 = 1/2 for the "scaling factor" + */ + reduce_fraction(subvp_pipe->stream->src.height, subvp_pipe->stream->dst.height, &out_num, &out_den); + // TODO: Uncomment below lines once DMCUB include headers are promoted + //pipe_data->pipe_config.subvp_data.scale_factor_numerator = out_num; + //pipe_data->pipe_config.subvp_data.scale_factor_denominator = out_den; + // Prefetch lines is equal to VACTIVE + BP + VSYNC pipe_data->pipe_config.subvp_data.prefetch_lines = phantom_timing->v_total - phantom_timing->v_front_porch; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index f8cf7e8d4bef8..49f2f46e0f652 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -543,9 +543,11 @@ static void dce112_get_pix_clk_dividers_helper ( switch (pix_clk_params->color_depth) { case COLOR_DEPTH_101010: actual_pixel_clock_100hz = (actual_pixel_clock_100hz * 5) >> 2; + actual_pixel_clock_100hz -= actual_pixel_clock_100hz % 10; break; case COLOR_DEPTH_121212: actual_pixel_clock_100hz = (actual_pixel_clock_100hz * 6) >> 2; + actual_pixel_clock_100hz -= actual_pixel_clock_100hz % 10; break; case COLOR_DEPTH_161616: actual_pixel_clock_100hz = actual_pixel_clock_100hz * 2; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c index d4a6504dfe000..db7ca4b0cdb9d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c @@ -361,8 +361,6 @@ void dpp1_cnv_setup ( select = INPUT_CSC_SELECT_ICSC; break; case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: - pixel_format = 22; - break; case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: pixel_format = 26; /* ARGB16161616_UNORM */ break; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index b54c124003237..564e061ccb589 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -278,9 +278,6 @@ void hubp1_program_pixel_format( SURFACE_PIXEL_FORMAT, 10); break; case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: - REG_UPDATE(DCSURF_SURFACE_CONFIG, - SURFACE_PIXEL_FORMAT, 22); - break; case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: /*we use crossbar already*/ REG_UPDATE(DCSURF_SURFACE_CONFIG, SURFACE_PIXEL_FORMAT, 26); /* ARGB16161616_UNORM */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 0877ab143b98b..1466c156847ac 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -1086,7 +1086,7 @@ static void optc1_set_test_pattern( src_color[index] >> (src_bpc - dst_bpc); /* CRTC_TEST_PATTERN_DATA has 16 bits, * lowest 6 are hardwired to ZERO - * color bits should be left aligned aligned to MSB + * color bits should be left aligned to MSB * XXXXXXXXXX000000 for 10 bit, * XXXXXXXX00000000 for 8 bit and XXXXXX0000000000 for 6 */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c index ea1f14af0db75..eaa7032f0f1a3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c @@ -166,8 +166,6 @@ static void dpp2_cnv_setup ( select = DCN2_ICSC_SELECT_ICSC_A; break; case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: - pixel_format = 22; - break; case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: pixel_format = 26; /* ARGB16161616_UNORM */ break; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c index 936af65381ef7..9570c2118ccc7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c @@ -463,9 +463,6 @@ void hubp2_program_pixel_format( SURFACE_PIXEL_FORMAT, 10); break; case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: - REG_UPDATE(DCSURF_SURFACE_CONFIG, - SURFACE_PIXEL_FORMAT, 22); - break; case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: /*we use crossbar already*/ REG_UPDATE(DCSURF_SURFACE_CONFIG, SURFACE_PIXEL_FORMAT, 26); /* ARGB16161616_UNORM */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c index c5e200d09038f..5752271f22dfe 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c @@ -67,9 +67,15 @@ static uint32_t convert_and_clamp( void dcn21_dchvm_init(struct hubbub *hubbub) { struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub); - uint32_t riommu_active; + uint32_t riommu_active, prefetch_done; int i; + REG_GET(DCHVM_RIOMMU_STAT0, HOSTVM_PREFETCH_DONE, &prefetch_done); + + if (prefetch_done) { + hubbub->riommu_active = true; + return; + } //Init DCHVM block REG_UPDATE(DCHVM_CTRL0, HOSTVM_INIT_REQ, 1); diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c index 77b00f86c2165..4a668d6563dfd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c @@ -244,8 +244,6 @@ void dpp3_cnv_setup ( select = INPUT_CSC_SELECT_ICSC; break; case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: - pixel_format = 22; - break; case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: pixel_format = 26; /* ARGB16161616_UNORM */ break; diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c index 6df6598d217dc..d4b488b1f9ffe 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c @@ -186,14 +186,6 @@ void optc3_set_dsc_config(struct timing_generator *optc, } #endif -void optc3_set_vrr_m_const(struct timing_generator *optc, - double vtotal_avg) -{ - DC_FP_START(); - optc3_fpu_set_vrr_m_const(optc, vtotal_avg); - DC_FP_END(); -} - void optc3_set_odm_bypass(struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h index 62f0ba7a3dd03..c305c69f2d134 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h @@ -329,9 +329,6 @@ void optc3_lock_doublebuffer_enable(struct timing_generator *optc); void optc3_lock_doublebuffer_disable(struct timing_generator *optc); -void optc3_set_vrr_m_const(struct timing_generator *optc, - double vtotal_avg); - void optc3_set_drr_trigger_window(struct timing_generator *optc, uint32_t window_start, uint32_t window_end); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index bb815654dbd22..6f1aef3d2bf85 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -332,7 +332,8 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable) { union dmub_rb_cmd cmd; - uint8_t ways, i, j; + uint8_t ways, i; + int j; bool stereo_in_use = false; struct dc_plane_state *plane = NULL; diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c index bd189802c7902..a222e56594e5c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c @@ -281,7 +281,6 @@ static struct timing_generator_funcs dcn32_tg_funcs = { .lock_doublebuffer_enable = optc3_lock_doublebuffer_enable, .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable, .enable_optc_clock = optc1_enable_optc_clock, - .set_vrr_m_const = optc3_set_vrr_m_const, .set_drr = optc32_set_drr, .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal, .set_vtotal_min_max = optc3_set_vtotal_min_max, diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index fba95082b5c36..e59aad43ea0b7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -63,7 +63,7 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat if (pipe->stream && pipe->plane_state && !pipe->top_pipe && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4; - mall_region_pixels = pipe->stream->timing.h_addressable * pipe->stream->timing.v_addressable; + mall_region_pixels = pipe->plane_state->plane_size.surface_pitch * pipe->stream->timing.v_addressable; // For bytes required in MALL, calculate based on number of MBlks required num_mblks = (mall_region_pixels * bytes_per_pixel + diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c index ef1eee2e54d9a..795d6fb0eaa97 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c @@ -1669,7 +1669,7 @@ static bool dcn321_resource_construct( dc->caps.subvp_prefetch_end_to_mall_start_us = 15; dc->caps.subvp_swath_height_margin_lines = 16; dc->caps.subvp_pstate_allow_width_us = 20; - + dc->caps.subvp_vertical_int_margin_us = 30; dc->caps.max_slave_planes = 1; dc->caps.max_slave_yuv_planes = 1; dc->caps.max_slave_rgb_planes = 1; diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/bw_fixed.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/bw_fixed.c index 6ca288fb5fb9e..3aa8dd0acd5e8 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/calcs/bw_fixed.c +++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/bw_fixed.c @@ -25,12 +25,11 @@ #include "dm_services.h" #include "bw_fixed.h" +#define MAX_I64 \ + ((int64_t)((1ULL << 63) - 1)) #define MIN_I64 \ - (int64_t)(-(1LL << 63)) - -#define MAX_I64 \ - (int64_t)((1ULL << 63) - 1) + (-MAX_I64 - 1) #define FRACTIONAL_PART_MASK \ ((1ULL << BW_FIXED_BITS_PER_FRACTIONAL_PART) - 1) @@ -49,6 +48,7 @@ static uint64_t abs_i64(int64_t arg) struct bw_fixed bw_int_to_fixed_nonconst(int64_t value) { struct bw_fixed res; + ASSERT(value < BW_FIXED_MAX_I32 && value > BW_FIXED_MIN_I32); res.value = value << BW_FIXED_BITS_PER_FRACTIONAL_PART; return res; @@ -78,14 +78,12 @@ struct bw_fixed bw_frc_to_fixed(int64_t numerator, int64_t denominator) { uint32_t i = BW_FIXED_BITS_PER_FRACTIONAL_PART; - do - { + do { remainder <<= 1; res_value <<= 1; - if (remainder >= arg2_value) - { + if (remainder >= arg2_value) { res_value |= 1; remainder -= arg2_value; } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c index 3fab19134480d..d63b4209b14c0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c @@ -26,7 +26,7 @@ #include "dc.h" #include "dc_link.h" #include "../display_mode_lib.h" -#include "dml/dcn30/display_mode_vba_30.h" +#include "../dcn30/display_mode_vba_30.h" #include "display_mode_vba_31.h" #include "../dml_inline_defs.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c index 66b82e4f05c6e..35d10b4d018bf 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c @@ -27,7 +27,7 @@ #include "../display_mode_vba.h" #include "../dml_inline_defs.h" #include "display_rq_dlg_calc_31.h" -#include "dml/dcn30/display_mode_vba_30.h" +#include "../dcn30/display_mode_vba_30.h" static bool is_dual_plane(enum source_format_class source_format) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c index c80307a6af1bf..34a5d0f87b5f9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c @@ -189,7 +189,7 @@ void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p dc_assert_fp_enabled(); // Default clock levels are used for diags, which may lead to overclocking. - if (!IS_DIAG_DC(dc->ctx->dce_environment)) { + if (!IS_DIAG_DC(dc->ctx->dce_environment) && dc->config.use_default_clock_table == false) { dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index a51e74344698e..0813f4fdb68c4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -493,6 +493,7 @@ void dcn32_set_phantom_stream_timing(struct dc *dc, phantom_stream->timing.v_front_porch + phantom_stream->timing.v_sync_width + phantom_bp; + phantom_stream->timing.flags.DSC = 0; // Don't need DSC for phantom timing } /** diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c deleted file mode 100644 index b4b51e51fc25c..0000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c +++ /dev/null @@ -1,1884 +0,0 @@ -/* - * Copyright 2017 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "resource.h" -#include "core_types.h" -#include "dsc.h" -#include "clk_mgr.h" - -#ifndef DC_LOGGER_INIT -#define DC_LOGGER_INIT -#undef DC_LOG_WARNING -#define DC_LOG_WARNING -#endif - -#define DML_WRAPPER_TRANSLATION_ -#include "dml_wrapper_translation.c" -#undef DML_WRAPPER_TRANSLATION_ - -static bool is_dual_plane(enum surface_pixel_format format) -{ - return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA; -} - -static void build_clamping_params(struct dc_stream_state *stream) -{ - stream->clamping.clamping_level = CLAMPING_FULL_RANGE; - stream->clamping.c_depth = stream->timing.display_color_depth; - stream->clamping.pixel_encoding = stream->timing.pixel_encoding; -} - -static void get_pixel_clock_parameters( - const struct pipe_ctx *pipe_ctx, - struct pixel_clk_params *pixel_clk_params) -{ - const struct dc_stream_state *stream = pipe_ctx->stream; - - /*TODO: is this halved for YCbCr 420? in that case we might want to move - * the pixel clock normalization for hdmi up to here instead of doing it - * in pll_adjust_pix_clk - */ - pixel_clk_params->requested_pix_clk_100hz = stream->timing.pix_clk_100hz; - pixel_clk_params->encoder_object_id = stream->link->link_enc->id; - pixel_clk_params->signal_type = pipe_ctx->stream->signal; - pixel_clk_params->controller_id = pipe_ctx->stream_res.tg->inst + 1; - /* TODO: un-hardcode*/ - pixel_clk_params->requested_sym_clk = LINK_RATE_LOW * - LINK_RATE_REF_FREQ_IN_KHZ; - pixel_clk_params->flags.ENABLE_SS = 0; - pixel_clk_params->color_depth = - stream->timing.display_color_depth; - pixel_clk_params->flags.DISPLAY_BLANKED = 1; - pixel_clk_params->flags.SUPPORT_YCBCR420 = (stream->timing.pixel_encoding == - PIXEL_ENCODING_YCBCR420); - pixel_clk_params->pixel_encoding = stream->timing.pixel_encoding; - if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) { - pixel_clk_params->color_depth = COLOR_DEPTH_888; - } - if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) { - pixel_clk_params->requested_pix_clk_100hz = pixel_clk_params->requested_pix_clk_100hz / 2; - } - if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) - pixel_clk_params->requested_pix_clk_100hz *= 2; - -} - -static void resource_build_bit_depth_reduction_params(struct dc_stream_state *stream, - struct bit_depth_reduction_params *fmt_bit_depth) -{ - enum dc_dither_option option = stream->dither_option; - enum dc_pixel_encoding pixel_encoding = - stream->timing.pixel_encoding; - - memset(fmt_bit_depth, 0, sizeof(*fmt_bit_depth)); - - if (option == DITHER_OPTION_DEFAULT) { - switch (stream->timing.display_color_depth) { - case COLOR_DEPTH_666: - option = DITHER_OPTION_SPATIAL6; - break; - case COLOR_DEPTH_888: - option = DITHER_OPTION_SPATIAL8; - break; - case COLOR_DEPTH_101010: - option = DITHER_OPTION_SPATIAL10; - break; - default: - option = DITHER_OPTION_DISABLE; - } - } - - if (option == DITHER_OPTION_DISABLE) - return; - - if (option == DITHER_OPTION_TRUN6) { - fmt_bit_depth->flags.TRUNCATE_ENABLED = 1; - fmt_bit_depth->flags.TRUNCATE_DEPTH = 0; - } else if (option == DITHER_OPTION_TRUN8 || - option == DITHER_OPTION_TRUN8_SPATIAL6 || - option == DITHER_OPTION_TRUN8_FM6) { - fmt_bit_depth->flags.TRUNCATE_ENABLED = 1; - fmt_bit_depth->flags.TRUNCATE_DEPTH = 1; - } else if (option == DITHER_OPTION_TRUN10 || - option == DITHER_OPTION_TRUN10_SPATIAL6 || - option == DITHER_OPTION_TRUN10_SPATIAL8 || - option == DITHER_OPTION_TRUN10_FM8 || - option == DITHER_OPTION_TRUN10_FM6 || - option == DITHER_OPTION_TRUN10_SPATIAL8_FM6) { - fmt_bit_depth->flags.TRUNCATE_ENABLED = 1; - fmt_bit_depth->flags.TRUNCATE_DEPTH = 2; - } - - /* special case - Formatter can only reduce by 4 bits at most. - * When reducing from 12 to 6 bits, - * HW recommends we use trunc with round mode - * (if we did nothing, trunc to 10 bits would be used) - * note that any 12->10 bit reduction is ignored prior to DCE8, - * as the input was 10 bits. - */ - if (option == DITHER_OPTION_SPATIAL6_FRAME_RANDOM || - option == DITHER_OPTION_SPATIAL6 || - option == DITHER_OPTION_FM6) { - fmt_bit_depth->flags.TRUNCATE_ENABLED = 1; - fmt_bit_depth->flags.TRUNCATE_DEPTH = 2; - fmt_bit_depth->flags.TRUNCATE_MODE = 1; - } - - /* spatial dither - * note that spatial modes 1-3 are never used - */ - if (option == DITHER_OPTION_SPATIAL6_FRAME_RANDOM || - option == DITHER_OPTION_SPATIAL6 || - option == DITHER_OPTION_TRUN10_SPATIAL6 || - option == DITHER_OPTION_TRUN8_SPATIAL6) { - fmt_bit_depth->flags.SPATIAL_DITHER_ENABLED = 1; - fmt_bit_depth->flags.SPATIAL_DITHER_DEPTH = 0; - fmt_bit_depth->flags.HIGHPASS_RANDOM = 1; - fmt_bit_depth->flags.RGB_RANDOM = - (pixel_encoding == PIXEL_ENCODING_RGB) ? 1 : 0; - } else if (option == DITHER_OPTION_SPATIAL8_FRAME_RANDOM || - option == DITHER_OPTION_SPATIAL8 || - option == DITHER_OPTION_SPATIAL8_FM6 || - option == DITHER_OPTION_TRUN10_SPATIAL8 || - option == DITHER_OPTION_TRUN10_SPATIAL8_FM6) { - fmt_bit_depth->flags.SPATIAL_DITHER_ENABLED = 1; - fmt_bit_depth->flags.SPATIAL_DITHER_DEPTH = 1; - fmt_bit_depth->flags.HIGHPASS_RANDOM = 1; - fmt_bit_depth->flags.RGB_RANDOM = - (pixel_encoding == PIXEL_ENCODING_RGB) ? 1 : 0; - } else if (option == DITHER_OPTION_SPATIAL10_FRAME_RANDOM || - option == DITHER_OPTION_SPATIAL10 || - option == DITHER_OPTION_SPATIAL10_FM8 || - option == DITHER_OPTION_SPATIAL10_FM6) { - fmt_bit_depth->flags.SPATIAL_DITHER_ENABLED = 1; - fmt_bit_depth->flags.SPATIAL_DITHER_DEPTH = 2; - fmt_bit_depth->flags.HIGHPASS_RANDOM = 1; - fmt_bit_depth->flags.RGB_RANDOM = - (pixel_encoding == PIXEL_ENCODING_RGB) ? 1 : 0; - } - - if (option == DITHER_OPTION_SPATIAL6 || - option == DITHER_OPTION_SPATIAL8 || - option == DITHER_OPTION_SPATIAL10) { - fmt_bit_depth->flags.FRAME_RANDOM = 0; - } else { - fmt_bit_depth->flags.FRAME_RANDOM = 1; - } - - ////////////////////// - //// temporal dither - ////////////////////// - if (option == DITHER_OPTION_FM6 || - option == DITHER_OPTION_SPATIAL8_FM6 || - option == DITHER_OPTION_SPATIAL10_FM6 || - option == DITHER_OPTION_TRUN10_FM6 || - option == DITHER_OPTION_TRUN8_FM6 || - option == DITHER_OPTION_TRUN10_SPATIAL8_FM6) { - fmt_bit_depth->flags.FRAME_MODULATION_ENABLED = 1; - fmt_bit_depth->flags.FRAME_MODULATION_DEPTH = 0; - } else if (option == DITHER_OPTION_FM8 || - option == DITHER_OPTION_SPATIAL10_FM8 || - option == DITHER_OPTION_TRUN10_FM8) { - fmt_bit_depth->flags.FRAME_MODULATION_ENABLED = 1; - fmt_bit_depth->flags.FRAME_MODULATION_DEPTH = 1; - } else if (option == DITHER_OPTION_FM10) { - fmt_bit_depth->flags.FRAME_MODULATION_ENABLED = 1; - fmt_bit_depth->flags.FRAME_MODULATION_DEPTH = 2; - } - - fmt_bit_depth->pixel_encoding = pixel_encoding; -} - -/* Move this after the above function as VS complains about - * declaration issues for resource_build_bit_depth_reduction_params. - */ - -static enum dc_status build_pipe_hw_param(struct pipe_ctx *pipe_ctx) -{ - - get_pixel_clock_parameters(pipe_ctx, &pipe_ctx->stream_res.pix_clk_params); - - if (pipe_ctx->clock_source) - pipe_ctx->clock_source->funcs->get_pix_clk_dividers( - pipe_ctx->clock_source, - &pipe_ctx->stream_res.pix_clk_params, - &pipe_ctx->pll_settings); - - pipe_ctx->stream->clamping.pixel_encoding = pipe_ctx->stream->timing.pixel_encoding; - - resource_build_bit_depth_reduction_params(pipe_ctx->stream, - &pipe_ctx->stream->bit_depth_params); - build_clamping_params(pipe_ctx->stream); - - return DC_OK; -} - -bool dml_validate_dsc(struct dc *dc, struct dc_state *new_ctx) -{ - int i; - - /* Validate DSC config, dsc count validation is already done */ - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe_ctx = &new_ctx->res_ctx.pipe_ctx[i]; - struct dc_stream_state *stream = pipe_ctx->stream; - struct dsc_config dsc_cfg; - struct pipe_ctx *odm_pipe; - int opp_cnt = 1; - - for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) - opp_cnt++; - - /* Only need to validate top pipe */ - if (pipe_ctx->top_pipe || pipe_ctx->prev_odm_pipe || !stream || !stream->timing.flags.DSC) - continue; - - dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left - + stream->timing.h_border_right) / opp_cnt; - dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top - + stream->timing.v_border_bottom; - dsc_cfg.pixel_encoding = stream->timing.pixel_encoding; - dsc_cfg.color_depth = stream->timing.display_color_depth; - dsc_cfg.is_odm = pipe_ctx->next_odm_pipe ? true : false; - dsc_cfg.dc_dsc_cfg = stream->timing.dsc_cfg; - dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt; - - if (pipe_ctx->stream_res.dsc && !pipe_ctx->stream_res.dsc->funcs->dsc_validate_stream(pipe_ctx->stream_res.dsc, &dsc_cfg)) - return false; - } - return true; -} - -enum dc_status dml_build_mapped_resource(const struct dc *dc, struct dc_state *context, struct dc_stream_state *stream) -{ - enum dc_status status = DC_OK; - struct pipe_ctx *pipe_ctx = resource_get_head_pipe_for_stream(&context->res_ctx, stream); - - if (!pipe_ctx) - return DC_ERROR_UNEXPECTED; - - - status = build_pipe_hw_param(pipe_ctx); - - return status; -} - -void dml_acquire_dsc(const struct dc *dc, - struct resource_context *res_ctx, - struct display_stream_compressor **dsc, - int pipe_idx) -{ - int i; - const struct resource_pool *pool = dc->res_pool; - struct display_stream_compressor *dsc_old = dc->current_state->res_ctx.pipe_ctx[pipe_idx].stream_res.dsc; - - ASSERT(*dsc == NULL); /* If this ASSERT fails, dsc was not released properly */ - *dsc = NULL; - - /* Always do 1-to-1 mapping when number of DSCs is same as number of pipes */ - if (pool->res_cap->num_dsc == pool->res_cap->num_opp) { - *dsc = pool->dscs[pipe_idx]; - res_ctx->is_dsc_acquired[pipe_idx] = true; - return; - } - - /* Return old DSC to avoid the need for redo it */ - if (dsc_old && !res_ctx->is_dsc_acquired[dsc_old->inst]) { - *dsc = dsc_old; - res_ctx->is_dsc_acquired[dsc_old->inst] = true; - return ; - } - - /* Find first free DSC */ - for (i = 0; i < pool->res_cap->num_dsc; i++) - if (!res_ctx->is_dsc_acquired[i]) { - *dsc = pool->dscs[i]; - res_ctx->is_dsc_acquired[i] = true; - break; - } -} - -static bool dml_split_stream_for_mpc_or_odm( - const struct dc *dc, - struct resource_context *res_ctx, - struct pipe_ctx *pri_pipe, - struct pipe_ctx *sec_pipe, - bool odm) -{ - int pipe_idx = sec_pipe->pipe_idx; - const struct resource_pool *pool = dc->res_pool; - - *sec_pipe = *pri_pipe; - - sec_pipe->pipe_idx = pipe_idx; - sec_pipe->plane_res.mi = pool->mis[pipe_idx]; - sec_pipe->plane_res.hubp = pool->hubps[pipe_idx]; - sec_pipe->plane_res.ipp = pool->ipps[pipe_idx]; - sec_pipe->plane_res.xfm = pool->transforms[pipe_idx]; - sec_pipe->plane_res.dpp = pool->dpps[pipe_idx]; - sec_pipe->plane_res.mpcc_inst = pool->dpps[pipe_idx]->inst; - sec_pipe->stream_res.dsc = NULL; - if (odm) { - if (pri_pipe->next_odm_pipe) { - ASSERT(pri_pipe->next_odm_pipe != sec_pipe); - sec_pipe->next_odm_pipe = pri_pipe->next_odm_pipe; - sec_pipe->next_odm_pipe->prev_odm_pipe = sec_pipe; - } - if (pri_pipe->top_pipe && pri_pipe->top_pipe->next_odm_pipe) { - pri_pipe->top_pipe->next_odm_pipe->bottom_pipe = sec_pipe; - sec_pipe->top_pipe = pri_pipe->top_pipe->next_odm_pipe; - } - if (pri_pipe->bottom_pipe && pri_pipe->bottom_pipe->next_odm_pipe) { - pri_pipe->bottom_pipe->next_odm_pipe->top_pipe = sec_pipe; - sec_pipe->bottom_pipe = pri_pipe->bottom_pipe->next_odm_pipe; - } - pri_pipe->next_odm_pipe = sec_pipe; - sec_pipe->prev_odm_pipe = pri_pipe; - ASSERT(sec_pipe->top_pipe == NULL); - - if (!sec_pipe->top_pipe) - sec_pipe->stream_res.opp = pool->opps[pipe_idx]; - else - sec_pipe->stream_res.opp = sec_pipe->top_pipe->stream_res.opp; - if (sec_pipe->stream->timing.flags.DSC == 1) { - dml_acquire_dsc(dc, res_ctx, &sec_pipe->stream_res.dsc, pipe_idx); - ASSERT(sec_pipe->stream_res.dsc); - if (sec_pipe->stream_res.dsc == NULL) - return false; - } - } else { - if (pri_pipe->bottom_pipe) { - ASSERT(pri_pipe->bottom_pipe != sec_pipe); - sec_pipe->bottom_pipe = pri_pipe->bottom_pipe; - sec_pipe->bottom_pipe->top_pipe = sec_pipe; - } - pri_pipe->bottom_pipe = sec_pipe; - sec_pipe->top_pipe = pri_pipe; - - ASSERT(pri_pipe->plane_state); - } - - return true; -} - -static struct pipe_ctx *dml_find_split_pipe( - struct dc *dc, - struct dc_state *context, - int old_index) -{ - struct pipe_ctx *pipe = NULL; - int i; - - if (old_index >= 0 && context->res_ctx.pipe_ctx[old_index].stream == NULL) { - pipe = &context->res_ctx.pipe_ctx[old_index]; - pipe->pipe_idx = old_index; - } - - if (!pipe) - for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { - if (dc->current_state->res_ctx.pipe_ctx[i].top_pipe == NULL - && dc->current_state->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) { - if (context->res_ctx.pipe_ctx[i].stream == NULL) { - pipe = &context->res_ctx.pipe_ctx[i]; - pipe->pipe_idx = i; - break; - } - } - } - - /* - * May need to fix pipes getting tossed from 1 opp to another on flip - * Add for debugging transient underflow during topology updates: - * ASSERT(pipe); - */ - if (!pipe) - for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { - if (context->res_ctx.pipe_ctx[i].stream == NULL) { - pipe = &context->res_ctx.pipe_ctx[i]; - pipe->pipe_idx = i; - break; - } - } - - return pipe; -} - -static void dml_release_dsc(struct resource_context *res_ctx, - const struct resource_pool *pool, - struct display_stream_compressor **dsc) -{ - int i; - - for (i = 0; i < pool->res_cap->num_dsc; i++) - if (pool->dscs[i] == *dsc) { - res_ctx->is_dsc_acquired[i] = false; - *dsc = NULL; - break; - } -} - -static int dml_get_num_mpc_splits(struct pipe_ctx *pipe) -{ - int mpc_split_count = 0; - struct pipe_ctx *other_pipe = pipe->bottom_pipe; - - while (other_pipe && other_pipe->plane_state == pipe->plane_state) { - mpc_split_count++; - other_pipe = other_pipe->bottom_pipe; - } - other_pipe = pipe->top_pipe; - while (other_pipe && other_pipe->plane_state == pipe->plane_state) { - mpc_split_count++; - other_pipe = other_pipe->top_pipe; - } - - return mpc_split_count; -} - -static bool dml_enough_pipes_for_subvp(struct dc *dc, - struct dc_state *context) -{ - int i = 0; - int num_pipes = 0; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (pipe->stream && pipe->plane_state) - num_pipes++; - } - - // Sub-VP only possible if the number of "real" pipes is - // less than or equal to half the number of available pipes - if (num_pipes * 2 > dc->res_pool->pipe_count) - return false; - - return true; -} - -static int dml_validate_apply_pipe_split_flags( - struct dc *dc, - struct dc_state *context, - int vlevel, - int *split, - bool *merge) -{ - int i, pipe_idx, vlevel_split; - int plane_count = 0; - bool force_split = false; - bool avoid_split = dc->debug.pipe_split_policy == MPC_SPLIT_AVOID; - struct vba_vars_st *v = &context->bw_ctx.dml.vba; - int max_mpc_comb = v->maxMpcComb; - - if (context->stream_count > 1) { - if (dc->debug.pipe_split_policy == MPC_SPLIT_AVOID_MULT_DISP) - avoid_split = true; - } else if (dc->debug.force_single_disp_pipe_split) - force_split = true; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - /** - * Workaround for avoiding pipe-split in cases where we'd split - * planes that are too small, resulting in splits that aren't - * valid for the scaler. - */ - if (pipe->plane_state && - (pipe->plane_state->dst_rect.width <= 16 || - pipe->plane_state->dst_rect.height <= 16 || - pipe->plane_state->src_rect.width <= 16 || - pipe->plane_state->src_rect.height <= 16)) - avoid_split = true; - - /* TODO: fix dc bugs and remove this split threshold thing */ - if (pipe->stream && !pipe->prev_odm_pipe && - (!pipe->top_pipe || pipe->top_pipe->plane_state != pipe->plane_state)) - ++plane_count; - } - if (plane_count > dc->res_pool->pipe_count / 2) - avoid_split = true; - - /* W/A: Mode timing with borders may not work well with pipe split, avoid for this corner case */ - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - struct dc_crtc_timing timing; - - if (!pipe->stream) - continue; - else { - timing = pipe->stream->timing; - if (timing.h_border_left + timing.h_border_right - + timing.v_border_top + timing.v_border_bottom > 0) { - avoid_split = true; - break; - } - } - } - - /* Avoid split loop looks for lowest voltage level that allows most unsplit pipes possible */ - if (avoid_split) { - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - - for (vlevel_split = vlevel; vlevel <= context->bw_ctx.dml.soc.num_states; vlevel++) - if (v->NoOfDPP[vlevel][0][pipe_idx] == 1 && - v->ModeSupport[vlevel][0]) - break; - /* Impossible to not split this pipe */ - if (vlevel > context->bw_ctx.dml.soc.num_states) - vlevel = vlevel_split; - else - max_mpc_comb = 0; - pipe_idx++; - } - v->maxMpcComb = max_mpc_comb; - } - - /* Split loop sets which pipe should be split based on dml outputs and dc flags */ - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - int pipe_plane = v->pipe_plane[pipe_idx]; - bool split4mpc = context->stream_count == 1 && plane_count == 1 - && dc->config.enable_4to1MPC && dc->res_pool->pipe_count >= 4; - - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - - if (split4mpc || v->NoOfDPP[vlevel][max_mpc_comb][pipe_plane] == 4) - split[i] = 4; - else if (force_split || v->NoOfDPP[vlevel][max_mpc_comb][pipe_plane] == 2) - split[i] = 2; - - if ((pipe->stream->view_format == - VIEW_3D_FORMAT_SIDE_BY_SIDE || - pipe->stream->view_format == - VIEW_3D_FORMAT_TOP_AND_BOTTOM) && - (pipe->stream->timing.timing_3d_format == - TIMING_3D_FORMAT_TOP_AND_BOTTOM || - pipe->stream->timing.timing_3d_format == - TIMING_3D_FORMAT_SIDE_BY_SIDE)) - split[i] = 2; - if (dc->debug.force_odm_combine & (1 << pipe->stream_res.tg->inst)) { - split[i] = 2; - v->ODMCombineEnablePerState[vlevel][pipe_plane] = dm_odm_combine_mode_2to1; - } - if (dc->debug.force_odm_combine_4to1 & (1 << pipe->stream_res.tg->inst)) { - split[i] = 4; - v->ODMCombineEnablePerState[vlevel][pipe_plane] = dm_odm_combine_mode_4to1; - } - /*420 format workaround*/ - if (pipe->stream->timing.h_addressable > 7680 && - pipe->stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) { - split[i] = 4; - } - - v->ODMCombineEnabled[pipe_plane] = - v->ODMCombineEnablePerState[vlevel][pipe_plane]; - - if (v->ODMCombineEnabled[pipe_plane] == dm_odm_combine_mode_disabled) { - if (dml_get_num_mpc_splits(pipe) == 1) { - /*If need split for mpc but 2 way split already*/ - if (split[i] == 4) - split[i] = 2; /* 2 -> 4 MPC */ - else if (split[i] == 2) - split[i] = 0; /* 2 -> 2 MPC */ - else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) - merge[i] = true; /* 2 -> 1 MPC */ - } else if (dml_get_num_mpc_splits(pipe) == 3) { - /*If need split for mpc but 4 way split already*/ - if (split[i] == 2 && ((pipe->top_pipe && !pipe->top_pipe->top_pipe) - || !pipe->bottom_pipe)) { - merge[i] = true; /* 4 -> 2 MPC */ - } else if (split[i] == 0 && pipe->top_pipe && - pipe->top_pipe->plane_state == pipe->plane_state) - merge[i] = true; /* 4 -> 1 MPC */ - split[i] = 0; - } else if (dml_get_num_mpc_splits(pipe)) { - /* ODM -> MPC transition */ - if (pipe->prev_odm_pipe) { - split[i] = 0; - merge[i] = true; - } - } - } else { - if (dml_get_num_mpc_splits(pipe) == 1) { - /*If need split for odm but 2 way split already*/ - if (split[i] == 4) - split[i] = 2; /* 2 -> 4 ODM */ - else if (split[i] == 2) - split[i] = 0; /* 2 -> 2 ODM */ - else if (pipe->prev_odm_pipe) { - ASSERT(0); /* NOT expected yet */ - merge[i] = true; /* exit ODM */ - } - } else if (dml_get_num_mpc_splits(pipe) == 3) { - /*If need split for odm but 4 way split already*/ - if (split[i] == 2 && ((pipe->prev_odm_pipe && !pipe->prev_odm_pipe->prev_odm_pipe) - || !pipe->next_odm_pipe)) { - ASSERT(0); /* NOT expected yet */ - merge[i] = true; /* 4 -> 2 ODM */ - } else if (split[i] == 0 && pipe->prev_odm_pipe) { - ASSERT(0); /* NOT expected yet */ - merge[i] = true; /* exit ODM */ - } - split[i] = 0; - } else if (dml_get_num_mpc_splits(pipe)) { - /* MPC -> ODM transition */ - ASSERT(0); /* NOT expected yet */ - if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) { - split[i] = 0; - merge[i] = true; - } - } - } - - /* Adjust dppclk when split is forced, do not bother with dispclk */ - if (split[i] != 0 && v->NoOfDPP[vlevel][max_mpc_comb][pipe_idx] == 1) - v->RequiredDPPCLK[vlevel][max_mpc_comb][pipe_idx] /= 2; - pipe_idx++; - } - - return vlevel; -} - -static void dml_set_phantom_stream_timing(struct dc *dc, - struct dc_state *context, - struct pipe_ctx *ref_pipe, - struct dc_stream_state *phantom_stream) -{ - // phantom_vactive = blackout (latency + margin) + fw_processing_delays + pstate allow width - uint32_t phantom_vactive_us = context->bw_ctx.dml.soc.dram_clock_change_latency_us + 60 + - dc->caps.subvp_fw_processing_delay_us + - dc->caps.subvp_pstate_allow_width_us; - uint32_t phantom_vactive = ((double)phantom_vactive_us/1000000) * - (ref_pipe->stream->timing.pix_clk_100hz * 100) / - (double)ref_pipe->stream->timing.h_total; - uint32_t phantom_bp = ref_pipe->pipe_dlg_param.vstartup_start; - - phantom_stream->dst.y = 0; - phantom_stream->dst.height = phantom_vactive; - phantom_stream->src.y = 0; - phantom_stream->src.height = phantom_vactive; - - phantom_stream->timing.v_addressable = phantom_vactive; - phantom_stream->timing.v_front_porch = 1; - phantom_stream->timing.v_total = phantom_stream->timing.v_addressable + - phantom_stream->timing.v_front_porch + - phantom_stream->timing.v_sync_width + - phantom_bp; -} - -static struct dc_stream_state *dml_enable_phantom_stream(struct dc *dc, - struct dc_state *context, - struct pipe_ctx *ref_pipe) -{ - struct dc_stream_state *phantom_stream = NULL; - - phantom_stream = dc_create_stream_for_sink(ref_pipe->stream->sink); - phantom_stream->signal = SIGNAL_TYPE_VIRTUAL; - phantom_stream->dpms_off = true; - phantom_stream->mall_stream_config.type = SUBVP_PHANTOM; - phantom_stream->mall_stream_config.paired_stream = ref_pipe->stream; - ref_pipe->stream->mall_stream_config.type = SUBVP_MAIN; - ref_pipe->stream->mall_stream_config.paired_stream = phantom_stream; - - /* stream has limited viewport and small timing */ - memcpy(&phantom_stream->timing, &ref_pipe->stream->timing, sizeof(phantom_stream->timing)); - memcpy(&phantom_stream->src, &ref_pipe->stream->src, sizeof(phantom_stream->src)); - memcpy(&phantom_stream->dst, &ref_pipe->stream->dst, sizeof(phantom_stream->dst)); - dml_set_phantom_stream_timing(dc, context, ref_pipe, phantom_stream); - - dc_add_stream_to_ctx(dc, context, phantom_stream); - dc->hwss.apply_ctx_to_hw(dc, context); - return phantom_stream; -} - -static void dml_enable_phantom_plane(struct dc *dc, - struct dc_state *context, - struct dc_stream_state *phantom_stream, - struct pipe_ctx *main_pipe) -{ - struct dc_plane_state *phantom_plane = NULL; - struct dc_plane_state *prev_phantom_plane = NULL; - struct pipe_ctx *curr_pipe = main_pipe; - - while (curr_pipe) { - if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state) - phantom_plane = prev_phantom_plane; - else - phantom_plane = dc_create_plane_state(dc); - - memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address)); - memcpy(&phantom_plane->scaling_quality, &curr_pipe->plane_state->scaling_quality, - sizeof(phantom_plane->scaling_quality)); - memcpy(&phantom_plane->src_rect, &curr_pipe->plane_state->src_rect, sizeof(phantom_plane->src_rect)); - memcpy(&phantom_plane->dst_rect, &curr_pipe->plane_state->dst_rect, sizeof(phantom_plane->dst_rect)); - memcpy(&phantom_plane->clip_rect, &curr_pipe->plane_state->clip_rect, sizeof(phantom_plane->clip_rect)); - memcpy(&phantom_plane->plane_size, &curr_pipe->plane_state->plane_size, - sizeof(phantom_plane->plane_size)); - memcpy(&phantom_plane->tiling_info, &curr_pipe->plane_state->tiling_info, - sizeof(phantom_plane->tiling_info)); - memcpy(&phantom_plane->dcc, &curr_pipe->plane_state->dcc, sizeof(phantom_plane->dcc)); - /* Currently compat_level is undefined in dc_state - * phantom_plane->compat_level = curr_pipe->plane_state->compat_level; - */ - phantom_plane->format = curr_pipe->plane_state->format; - phantom_plane->rotation = curr_pipe->plane_state->rotation; - phantom_plane->visible = curr_pipe->plane_state->visible; - - /* Shadow pipe has small viewport. */ - phantom_plane->clip_rect.y = 0; - phantom_plane->clip_rect.height = phantom_stream->timing.v_addressable; - - dc_add_plane_to_context(dc, phantom_stream, phantom_plane, context); - - curr_pipe = curr_pipe->bottom_pipe; - prev_phantom_plane = phantom_plane; - } -} - -static void dml_add_phantom_pipes(struct dc *dc, struct dc_state *context) -{ - int i = 0; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - struct dc_stream_state *ref_stream = pipe->stream; - // Only construct phantom stream for top pipes that have plane enabled - if (!pipe->top_pipe && pipe->plane_state && pipe->stream && - pipe->stream->mall_stream_config.type == SUBVP_NONE) { - struct dc_stream_state *phantom_stream = NULL; - - phantom_stream = dml_enable_phantom_stream(dc, context, pipe); - dml_enable_phantom_plane(dc, context, phantom_stream, pipe); - } - } - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (pipe->plane_state && pipe->stream && - pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { - pipe->stream->use_dynamic_meta = false; - pipe->plane_state->flip_immediate = false; - if (!resource_build_scaling_params(pipe)) { - // Log / remove phantom pipes since failed to build scaling params - } - } - } -} - -static void dml_remove_phantom_pipes(struct dc *dc, struct dc_state *context) -{ - int i; - bool removed_pipe = false; - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - // build scaling params for phantom pipes - if (pipe->plane_state && pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) { - dc_rem_all_planes_for_stream(dc, pipe->stream, context); - dc_remove_stream_from_ctx(dc, context, pipe->stream); - removed_pipe = true; - } - - // Clear all phantom stream info - if (pipe->stream) { - pipe->stream->mall_stream_config.type = SUBVP_NONE; - pipe->stream->mall_stream_config.paired_stream = NULL; - } - } - if (removed_pipe) - dc->hwss.apply_ctx_to_hw(dc, context); -} - -/* - * If the input state contains no upstream planes for a particular pipe (i.e. only timing) - * we need to populate some "conservative" plane information as DML cannot handle "no planes" - */ -static void populate_default_plane_from_timing(const struct dc_crtc_timing *timing, struct _vcs_dpi_display_pipe_params_st *pipe) -{ - pipe->src.is_hsplit = pipe->dest.odm_combine != dm_odm_combine_mode_disabled; - pipe->src.source_scan = dm_horz; - pipe->src.sw_mode = dm_sw_4kb_s; - pipe->src.macro_tile_size = dm_64k_tile; - pipe->src.viewport_width = timing->h_addressable; - if (pipe->src.viewport_width > 1920) - pipe->src.viewport_width = 1920; - pipe->src.viewport_height = timing->v_addressable; - if (pipe->src.viewport_height > 1080) - pipe->src.viewport_height = 1080; - pipe->src.surface_height_y = pipe->src.viewport_height; - pipe->src.surface_width_y = pipe->src.viewport_width; - pipe->src.surface_height_c = pipe->src.viewport_height; - pipe->src.surface_width_c = pipe->src.viewport_width; - pipe->src.data_pitch = ((pipe->src.viewport_width + 255) / 256) * 256; - pipe->src.source_format = dm_444_32; - pipe->dest.recout_width = pipe->src.viewport_width; - pipe->dest.recout_height = pipe->src.viewport_height; - pipe->dest.full_recout_width = pipe->dest.recout_width; - pipe->dest.full_recout_height = pipe->dest.recout_height; - pipe->scale_ratio_depth.lb_depth = dm_lb_16; - pipe->scale_ratio_depth.hscl_ratio = 1.0; - pipe->scale_ratio_depth.vscl_ratio = 1.0; - pipe->scale_ratio_depth.scl_enable = 0; - pipe->scale_taps.htaps = 1; - pipe->scale_taps.vtaps = 1; - pipe->dest.vtotal_min = timing->v_total; - pipe->dest.vtotal_max = timing->v_total; - - if (pipe->dest.odm_combine == dm_odm_combine_mode_2to1) { - pipe->src.viewport_width /= 2; - pipe->dest.recout_width /= 2; - } else if (pipe->dest.odm_combine == dm_odm_combine_mode_4to1) { - pipe->src.viewport_width /= 4; - pipe->dest.recout_width /= 4; - } - - pipe->src.dcc = false; - pipe->src.dcc_rate = 1; -} - -/* - * If the pipe is not blending (i.e. pipe_ctx->top pipe == null) then its - * hsplit group is equal to its own pipe ID - * Otherwise, all pipes part of the same blending tree have the same hsplit group - * ID as the top most pipe - * - * If the pipe ctx is ODM combined, then similar logic follows - */ -static void populate_hsplit_group_from_dc_pipe_ctx (const struct pipe_ctx *dc_pipe_ctx, struct _vcs_dpi_display_e2e_pipe_params_st *e2e_pipe) -{ - e2e_pipe->pipe.src.hsplit_grp = dc_pipe_ctx->pipe_idx; - - if (dc_pipe_ctx->top_pipe && dc_pipe_ctx->top_pipe->plane_state - == dc_pipe_ctx->plane_state) { - struct pipe_ctx *first_pipe = dc_pipe_ctx->top_pipe; - int split_idx = 0; - - while (first_pipe->top_pipe && first_pipe->top_pipe->plane_state - == dc_pipe_ctx->plane_state) { - first_pipe = first_pipe->top_pipe; - split_idx++; - } - - /* Treat 4to1 mpc combine as an mpo of 2 2-to-1 combines */ - if (split_idx == 0) - e2e_pipe->pipe.src.hsplit_grp = first_pipe->pipe_idx; - else if (split_idx == 1) - e2e_pipe->pipe.src.hsplit_grp = dc_pipe_ctx->pipe_idx; - else if (split_idx == 2) - e2e_pipe->pipe.src.hsplit_grp = dc_pipe_ctx->top_pipe->pipe_idx; - - } else if (dc_pipe_ctx->prev_odm_pipe) { - struct pipe_ctx *first_pipe = dc_pipe_ctx->prev_odm_pipe; - - while (first_pipe->prev_odm_pipe) - first_pipe = first_pipe->prev_odm_pipe; - e2e_pipe->pipe.src.hsplit_grp = first_pipe->pipe_idx; - } -} - -static void populate_dml_from_dc_pipe_ctx (const struct pipe_ctx *dc_pipe_ctx, struct _vcs_dpi_display_e2e_pipe_params_st *e2e_pipe, int always_scale) -{ - const struct dc_plane_state *pln = dc_pipe_ctx->plane_state; - const struct scaler_data *scl = &dc_pipe_ctx->plane_res.scl_data; - - e2e_pipe->pipe.src.immediate_flip = pln->flip_immediate; - e2e_pipe->pipe.src.is_hsplit = (dc_pipe_ctx->bottom_pipe && dc_pipe_ctx->bottom_pipe->plane_state == pln) - || (dc_pipe_ctx->top_pipe && dc_pipe_ctx->top_pipe->plane_state == pln) - || e2e_pipe->pipe.dest.odm_combine != dm_odm_combine_mode_disabled; - - /* stereo is not split */ - if (pln->stereo_format == PLANE_STEREO_FORMAT_SIDE_BY_SIDE || - pln->stereo_format == PLANE_STEREO_FORMAT_TOP_AND_BOTTOM) { - e2e_pipe->pipe.src.is_hsplit = false; - e2e_pipe->pipe.src.hsplit_grp = dc_pipe_ctx->pipe_idx; - } - - e2e_pipe->pipe.src.source_scan = pln->rotation == ROTATION_ANGLE_90 - || pln->rotation == ROTATION_ANGLE_270 ? dm_vert : dm_horz; - e2e_pipe->pipe.src.viewport_y_y = scl->viewport.y; - e2e_pipe->pipe.src.viewport_y_c = scl->viewport_c.y; - e2e_pipe->pipe.src.viewport_width = scl->viewport.width; - e2e_pipe->pipe.src.viewport_width_c = scl->viewport_c.width; - e2e_pipe->pipe.src.viewport_height = scl->viewport.height; - e2e_pipe->pipe.src.viewport_height_c = scl->viewport_c.height; - e2e_pipe->pipe.src.viewport_width_max = pln->src_rect.width; - e2e_pipe->pipe.src.viewport_height_max = pln->src_rect.height; - e2e_pipe->pipe.src.surface_width_y = pln->plane_size.surface_size.width; - e2e_pipe->pipe.src.surface_height_y = pln->plane_size.surface_size.height; - e2e_pipe->pipe.src.surface_width_c = pln->plane_size.chroma_size.width; - e2e_pipe->pipe.src.surface_height_c = pln->plane_size.chroma_size.height; - - if (pln->format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA - || pln->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { - e2e_pipe->pipe.src.data_pitch = pln->plane_size.surface_pitch; - e2e_pipe->pipe.src.data_pitch_c = pln->plane_size.chroma_pitch; - e2e_pipe->pipe.src.meta_pitch = pln->dcc.meta_pitch; - e2e_pipe->pipe.src.meta_pitch_c = pln->dcc.meta_pitch_c; - } else { - e2e_pipe->pipe.src.data_pitch = pln->plane_size.surface_pitch; - e2e_pipe->pipe.src.meta_pitch = pln->dcc.meta_pitch; - } - e2e_pipe->pipe.src.dcc = pln->dcc.enable; - e2e_pipe->pipe.src.dcc_rate = 1; - e2e_pipe->pipe.dest.recout_width = scl->recout.width; - e2e_pipe->pipe.dest.recout_height = scl->recout.height; - e2e_pipe->pipe.dest.full_recout_height = scl->recout.height; - e2e_pipe->pipe.dest.full_recout_width = scl->recout.width; - if (e2e_pipe->pipe.dest.odm_combine == dm_odm_combine_mode_2to1) - e2e_pipe->pipe.dest.full_recout_width *= 2; - else if (e2e_pipe->pipe.dest.odm_combine == dm_odm_combine_mode_4to1) - e2e_pipe->pipe.dest.full_recout_width *= 4; - else { - struct pipe_ctx *split_pipe = dc_pipe_ctx->bottom_pipe; - - while (split_pipe && split_pipe->plane_state == pln) { - e2e_pipe->pipe.dest.full_recout_width += split_pipe->plane_res.scl_data.recout.width; - split_pipe = split_pipe->bottom_pipe; - } - split_pipe = dc_pipe_ctx->top_pipe; - while (split_pipe && split_pipe->plane_state == pln) { - e2e_pipe->pipe.dest.full_recout_width += split_pipe->plane_res.scl_data.recout.width; - split_pipe = split_pipe->top_pipe; - } - } - - e2e_pipe->pipe.scale_ratio_depth.lb_depth = dm_lb_16; - e2e_pipe->pipe.scale_ratio_depth.hscl_ratio = (double) scl->ratios.horz.value / (1ULL<<32); - e2e_pipe->pipe.scale_ratio_depth.hscl_ratio_c = (double) scl->ratios.horz_c.value / (1ULL<<32); - e2e_pipe->pipe.scale_ratio_depth.vscl_ratio = (double) scl->ratios.vert.value / (1ULL<<32); - e2e_pipe->pipe.scale_ratio_depth.vscl_ratio_c = (double) scl->ratios.vert_c.value / (1ULL<<32); - e2e_pipe->pipe.scale_ratio_depth.scl_enable = - scl->ratios.vert.value != dc_fixpt_one.value - || scl->ratios.horz.value != dc_fixpt_one.value - || scl->ratios.vert_c.value != dc_fixpt_one.value - || scl->ratios.horz_c.value != dc_fixpt_one.value /*Lb only or Full scl*/ - || always_scale; /*support always scale*/ - e2e_pipe->pipe.scale_taps.htaps = scl->taps.h_taps; - e2e_pipe->pipe.scale_taps.htaps_c = scl->taps.h_taps_c; - e2e_pipe->pipe.scale_taps.vtaps = scl->taps.v_taps; - e2e_pipe->pipe.scale_taps.vtaps_c = scl->taps.v_taps_c; - - /* Currently compat_level is not defined. Commenting it until further resolution - * if (pln->compat_level == DC_LEGACY_TILING_ADDR_GEN_TWO) { - swizzle_to_dml_params(pln->tiling_info.gfx9.swizzle, - &e2e_pipe->pipe.src.sw_mode); - e2e_pipe->pipe.src.macro_tile_size = - swizzle_mode_to_macro_tile_size(pln->tiling_info.gfx9.swizzle); - } else { - gfx10array_mode_to_dml_params(pln->tiling_info.gfx10compatible.array_mode, - pln->compat_level, - &e2e_pipe->pipe.src.sw_mode); - e2e_pipe->pipe.src.macro_tile_size = dm_4k_tile; - }*/ - - e2e_pipe->pipe.src.source_format = dc_source_format_to_dml_source_format(pln->format); -} - -static void populate_dml_cursor_parameters_from_dc_pipe_ctx (const struct pipe_ctx *dc_pipe_ctx, struct _vcs_dpi_display_e2e_pipe_params_st *e2e_pipe) -{ - /* - * For graphic plane, cursor number is 1, nv12 is 0 - * bw calculations due to cursor on/off - */ - if (dc_pipe_ctx->plane_state && - (dc_pipe_ctx->plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE || - dc_pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM)) - e2e_pipe->pipe.src.num_cursors = 0; - else - e2e_pipe->pipe.src.num_cursors = 1; - - e2e_pipe->pipe.src.cur0_src_width = 256; - e2e_pipe->pipe.src.cur0_bpp = dm_cur_32bit; -} - -static int populate_dml_pipes_from_context_base( - struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - bool fast_validate) -{ - int pipe_cnt, i; - bool synchronized_vblank = true; - struct resource_context *res_ctx = &context->res_ctx; - - for (i = 0, pipe_cnt = -1; i < dc->res_pool->pipe_count; i++) { - if (!res_ctx->pipe_ctx[i].stream) - continue; - - if (pipe_cnt < 0) { - pipe_cnt = i; - continue; - } - - if (res_ctx->pipe_ctx[pipe_cnt].stream == res_ctx->pipe_ctx[i].stream) - continue; - - if (dc->debug.disable_timing_sync || - (!resource_are_streams_timing_synchronizable( - res_ctx->pipe_ctx[pipe_cnt].stream, - res_ctx->pipe_ctx[i].stream) && - !resource_are_vblanks_synchronizable( - res_ctx->pipe_ctx[pipe_cnt].stream, - res_ctx->pipe_ctx[i].stream))) { - synchronized_vblank = false; - break; - } - } - - for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { - struct dc_crtc_timing *timing = &res_ctx->pipe_ctx[i].stream->timing; - - struct audio_check aud_check = {0}; - if (!res_ctx->pipe_ctx[i].stream) - continue; - - /* todo: - pipes[pipe_cnt].pipe.src.dynamic_metadata_enable = 0; - pipes[pipe_cnt].pipe.src.dcc = 0; - pipes[pipe_cnt].pipe.src.vm = 0;*/ - - pipes[pipe_cnt].clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0; - - pipes[pipe_cnt].dout.dsc_enable = res_ctx->pipe_ctx[i].stream->timing.flags.DSC; - /* todo: rotation?*/ - pipes[pipe_cnt].dout.dsc_slices = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.num_slices_h; - if (res_ctx->pipe_ctx[i].stream->use_dynamic_meta) { - pipes[pipe_cnt].pipe.src.dynamic_metadata_enable = true; - /* 1/2 vblank */ - pipes[pipe_cnt].pipe.src.dynamic_metadata_lines_before_active = - (timing->v_total - timing->v_addressable - - timing->v_border_top - timing->v_border_bottom) / 2; - /* 36 bytes dp, 32 hdmi */ - pipes[pipe_cnt].pipe.src.dynamic_metadata_xmit_bytes = - dc_is_dp_signal(res_ctx->pipe_ctx[i].stream->signal) ? 36 : 32; - } - pipes[pipe_cnt].pipe.dest.synchronized_vblank_all_planes = synchronized_vblank; - - dc_timing_to_dml_timing(timing, &pipes[pipe_cnt].pipe.dest); - pipes[pipe_cnt].pipe.dest.vtotal_min = res_ctx->pipe_ctx[i].stream->adjust.v_total_min; - pipes[pipe_cnt].pipe.dest.vtotal_max = res_ctx->pipe_ctx[i].stream->adjust.v_total_max; - - pipes[pipe_cnt].pipe.dest.otg_inst = res_ctx->pipe_ctx[i].stream_res.tg->inst; - - pipes[pipe_cnt].pipe.dest.odm_combine = get_dml_odm_combine(&res_ctx->pipe_ctx[i]); - - populate_hsplit_group_from_dc_pipe_ctx(&res_ctx->pipe_ctx[i], &pipes[pipe_cnt]); - - pipes[pipe_cnt].dout.dp_lanes = 4; - pipes[pipe_cnt].dout.is_virtual = 0; - pipes[pipe_cnt].dout.output_type = get_dml_output_type(res_ctx->pipe_ctx[i].stream->signal); - if (pipes[pipe_cnt].dout.output_type < 0) { - pipes[pipe_cnt].dout.output_type = dm_dp; - pipes[pipe_cnt].dout.is_virtual = 1; - } - - populate_color_depth_and_encoding_from_timing(&res_ctx->pipe_ctx[i].stream->timing, &pipes[pipe_cnt].dout); - - if (res_ctx->pipe_ctx[i].stream->timing.flags.DSC) - pipes[pipe_cnt].dout.output_bpp = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.bits_per_pixel / 16.0; - - /* todo: default max for now, until there is logic reflecting this in dc*/ - pipes[pipe_cnt].dout.dsc_input_bpc = 12; - /*fill up the audio sample rate (unit in kHz)*/ - get_audio_check(&res_ctx->pipe_ctx[i].stream->audio_info, &aud_check); - pipes[pipe_cnt].dout.max_audio_sample_rate = aud_check.max_audiosample_rate / 1000; - - populate_dml_cursor_parameters_from_dc_pipe_ctx(&res_ctx->pipe_ctx[i], &pipes[pipe_cnt]); - - if (!res_ctx->pipe_ctx[i].plane_state) { - populate_default_plane_from_timing(timing, &pipes[pipe_cnt].pipe); - } else { - populate_dml_from_dc_pipe_ctx(&res_ctx->pipe_ctx[i], &pipes[pipe_cnt], dc->debug.always_scale); - } - - pipe_cnt++; - } - - /* populate writeback information */ - if (dc->res_pool) - dc->res_pool->funcs->populate_dml_writeback_from_context(dc, res_ctx, pipes); - - return pipe_cnt; -} - -static int dml_populate_dml_pipes_from_context( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - bool fast_validate) -{ - int i, pipe_cnt; - struct resource_context *res_ctx = &context->res_ctx; - struct pipe_ctx *pipe = NULL; // Fix potentially uninitialized error from VS - - populate_dml_pipes_from_context_base(dc, context, pipes, fast_validate); - - for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) { - struct dc_crtc_timing *timing; - - if (!res_ctx->pipe_ctx[i].stream) - continue; - pipe = &res_ctx->pipe_ctx[i]; - timing = &pipe->stream->timing; - - pipes[pipe_cnt].pipe.src.gpuvm = true; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; - pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; - - pipes[pipe_cnt].dout.dsc_input_bpc = 0; - if (pipes[pipe_cnt].dout.dsc_enable) { - switch (timing->display_color_depth) { - case COLOR_DEPTH_888: - pipes[pipe_cnt].dout.dsc_input_bpc = 8; - break; - case COLOR_DEPTH_101010: - pipes[pipe_cnt].dout.dsc_input_bpc = 10; - break; - case COLOR_DEPTH_121212: - pipes[pipe_cnt].dout.dsc_input_bpc = 12; - break; - default: - ASSERT(0); - break; - } - } - pipe_cnt++; - } - dc->config.enable_4to1MPC = false; - if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) { - if (is_dual_plane(pipe->plane_state->format) - && pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) { - dc->config.enable_4to1MPC = true; - } else if (!is_dual_plane(pipe->plane_state->format)) { - context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192; - pipes[0].pipe.src.unbounded_req_mode = true; - } - } - - return pipe_cnt; -} - -static void dml_full_validate_bw_helper(struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int *vlevel, - int *split, - bool *merge, - int *pipe_cnt) -{ - struct vba_vars_st *vba = &context->bw_ctx.dml.vba; - - /* - * DML favors voltage over p-state, but we're more interested in - * supporting p-state over voltage. We can't support p-state in - * prefetch mode > 0 so try capping the prefetch mode to start. - */ - context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank = - dm_allow_self_refresh_and_mclk_switch; - *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); - /* This may adjust vlevel and maxMpcComb */ - if (*vlevel < context->bw_ctx.dml.soc.num_states) - *vlevel = dml_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge); - - /* Conditions for setting up phantom pipes for SubVP: - * 1. Not force disable SubVP - * 2. Full update (i.e. !fast_validate) - * 3. Enough pipes are available to support SubVP (TODO: Which pipes will use VACTIVE / VBLANK / SUBVP?) - * 4. Display configuration passes validation - * 5. (Config doesn't support MCLK in VACTIVE/VBLANK || dc->debug.force_subvp_mclk_switch) - */ - if (!dc->debug.force_disable_subvp && - dml_enough_pipes_for_subvp(dc, context) && - *vlevel < context->bw_ctx.dml.soc.num_states && - (vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported || - dc->debug.force_subvp_mclk_switch)) { - - dml_add_phantom_pipes(dc, context); - - /* Create input to DML based on new context which includes phantom pipes - * TODO: Input to DML should mark which pipes are phantom - */ - *pipe_cnt = dml_populate_dml_pipes_from_context(dc, context, pipes, false); - *vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt); - if (*vlevel < context->bw_ctx.dml.soc.num_states) { - memset(split, 0, MAX_PIPES * sizeof(*split)); - memset(merge, 0, MAX_PIPES * sizeof(*merge)); - *vlevel = dml_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge); - } - - // If SubVP pipe config is unsupported (or cannot be used for UCLK switching) - // remove phantom pipes and repopulate dml pipes - if (*vlevel == context->bw_ctx.dml.soc.num_states || - vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) { - dml_remove_phantom_pipes(dc, context); - *pipe_cnt = dml_populate_dml_pipes_from_context(dc, context, pipes, false); - } - } -} - -static void dcn20_adjust_adaptive_sync_v_startup( - const struct dc_crtc_timing *dc_crtc_timing, int *vstartup_start) -{ - struct dc_crtc_timing patched_crtc_timing; - uint32_t asic_blank_end = 0; - uint32_t asic_blank_start = 0; - uint32_t newVstartup = 0; - - patched_crtc_timing = *dc_crtc_timing; - - if (patched_crtc_timing.flags.INTERLACE == 1) { - if (patched_crtc_timing.v_front_porch < 2) - patched_crtc_timing.v_front_porch = 2; - } else { - if (patched_crtc_timing.v_front_porch < 1) - patched_crtc_timing.v_front_porch = 1; - } - - /* blank_start = frame end - front porch */ - asic_blank_start = patched_crtc_timing.v_total - - patched_crtc_timing.v_front_porch; - - /* blank_end = blank_start - active */ - asic_blank_end = asic_blank_start - - patched_crtc_timing.v_border_bottom - - patched_crtc_timing.v_addressable - - patched_crtc_timing.v_border_top; - - newVstartup = asic_blank_end + (patched_crtc_timing.v_total - asic_blank_start); - - *vstartup_start = ((newVstartup > *vstartup_start) ? newVstartup : *vstartup_start); -} - -static bool is_dp_128b_132b_signal(struct pipe_ctx *pipe_ctx) -{ - return (pipe_ctx->stream_res.hpo_dp_stream_enc && - pipe_ctx->link_res.hpo_dp_link_enc && - dc_is_dp_signal(pipe_ctx->stream->signal)); -} - -static bool is_dtbclk_required(struct dc *dc, struct dc_state *context) -{ - int i; - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - if (is_dp_128b_132b_signal(&context->res_ctx.pipe_ctx[i])) - return true; - } - return false; -} - -static void dml_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) -{ - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us; - context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us; - } -} - -static bool dml_internal_validate( - struct dc *dc, - struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int *pipe_cnt_out, - int *vlevel_out, - bool fast_validate) -{ - bool out = false; - bool repopulate_pipes = false; - int split[MAX_PIPES] = { 0 }; - bool merge[MAX_PIPES] = { false }; - bool newly_split[MAX_PIPES] = { false }; - int pipe_cnt, i, pipe_idx, vlevel; - struct vba_vars_st *vba = &context->bw_ctx.dml.vba; - - ASSERT(pipes); - if (!pipes) - return false; - - // For each full update, remove all existing phantom pipes first - dml_remove_phantom_pipes(dc, context); - - dml_update_soc_for_wm_a(dc, context); - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (pipe->plane_state) { - // On initial pass through DML, we intend to use MALL for SS on all - // (non-PSR) surfaces with none using MALL for P-State - // 'mall_plane_config': is not a member of 'dc_plane_state' - commenting it out till mall_plane_config gets supported in dc_plant_state - //if (pipe->stream && pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED) - // pipe->plane_state->mall_plane_config.use_mall_for_ss = true; - } - } - pipe_cnt = dml_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); - - if (!pipe_cnt) { - out = true; - goto validate_out; - } - - dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt); - - if (!fast_validate) { - dml_full_validate_bw_helper(dc, context, pipes, &vlevel, split, merge, &pipe_cnt); - } - - if (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states || - vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) { - /* - * If mode is unsupported or there's still no p-state support then - * fall back to favoring voltage. - * - * We don't actually support prefetch mode 2, so require that we - * at least support prefetch mode 1. - */ - context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank = - dm_allow_self_refresh; - - vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt); - if (vlevel < context->bw_ctx.dml.soc.num_states) { - memset(split, 0, sizeof(split)); - memset(merge, 0, sizeof(merge)); - vlevel = dml_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge); - } - } - - dml_log_mode_support_params(&context->bw_ctx.dml); - - if (vlevel == context->bw_ctx.dml.soc.num_states) - goto validate_fail; - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - struct pipe_ctx *mpo_pipe = pipe->bottom_pipe; - - if (!pipe->stream) - continue; - - /* We only support full screen mpo with ODM */ - if (vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled - && pipe->plane_state && mpo_pipe - && memcmp(&mpo_pipe->plane_res.scl_data.recout, - &pipe->plane_res.scl_data.recout, - sizeof(struct rect)) != 0) { - ASSERT(mpo_pipe->plane_state != pipe->plane_state); - goto validate_fail; - } - pipe_idx++; - } - - /* merge pipes if necessary */ - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - /*skip pipes that don't need merging*/ - if (!merge[i]) - continue; - - /* if ODM merge we ignore mpc tree, mpo pipes will have their own flags */ - if (pipe->prev_odm_pipe) { - /*split off odm pipe*/ - pipe->prev_odm_pipe->next_odm_pipe = pipe->next_odm_pipe; - if (pipe->next_odm_pipe) - pipe->next_odm_pipe->prev_odm_pipe = pipe->prev_odm_pipe; - - pipe->bottom_pipe = NULL; - pipe->next_odm_pipe = NULL; - pipe->plane_state = NULL; - pipe->stream = NULL; - pipe->top_pipe = NULL; - pipe->prev_odm_pipe = NULL; - if (pipe->stream_res.dsc) - dml_release_dsc(&context->res_ctx, dc->res_pool, &pipe->stream_res.dsc); - memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); - memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); - repopulate_pipes = true; - } else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) { - struct pipe_ctx *top_pipe = pipe->top_pipe; - struct pipe_ctx *bottom_pipe = pipe->bottom_pipe; - - top_pipe->bottom_pipe = bottom_pipe; - if (bottom_pipe) - bottom_pipe->top_pipe = top_pipe; - - pipe->top_pipe = NULL; - pipe->bottom_pipe = NULL; - pipe->plane_state = NULL; - pipe->stream = NULL; - memset(&pipe->plane_res, 0, sizeof(pipe->plane_res)); - memset(&pipe->stream_res, 0, sizeof(pipe->stream_res)); - repopulate_pipes = true; - } else - ASSERT(0); /* Should never try to merge master pipe */ - - } - - for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; - struct pipe_ctx *hsplit_pipe = NULL; - bool odm; - int old_index = -1; - - if (!pipe->stream || newly_split[i]) - continue; - - pipe_idx++; - odm = vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled; - - if (!pipe->plane_state && !odm) - continue; - - if (split[i]) { - if (odm) { - if (split[i] == 4 && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe) - old_index = old_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; - else if (old_pipe->next_odm_pipe) - old_index = old_pipe->next_odm_pipe->pipe_idx; - } else { - if (split[i] == 4 && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe && - old_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) - old_index = old_pipe->bottom_pipe->bottom_pipe->pipe_idx; - else if (old_pipe->bottom_pipe && - old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) - old_index = old_pipe->bottom_pipe->pipe_idx; - } - hsplit_pipe = dml_find_split_pipe(dc, context, old_index); - ASSERT(hsplit_pipe); - if (!hsplit_pipe) - goto validate_fail; - - if (!dml_split_stream_for_mpc_or_odm( - dc, &context->res_ctx, - pipe, hsplit_pipe, odm)) - goto validate_fail; - - newly_split[hsplit_pipe->pipe_idx] = true; - repopulate_pipes = true; - } - if (split[i] == 4) { - struct pipe_ctx *pipe_4to1; - - if (odm && old_pipe->next_odm_pipe) - old_index = old_pipe->next_odm_pipe->pipe_idx; - else if (!odm && old_pipe->bottom_pipe && - old_pipe->bottom_pipe->plane_state == old_pipe->plane_state) - old_index = old_pipe->bottom_pipe->pipe_idx; - else - old_index = -1; - pipe_4to1 = dml_find_split_pipe(dc, context, old_index); - ASSERT(pipe_4to1); - if (!pipe_4to1) - goto validate_fail; - if (!dml_split_stream_for_mpc_or_odm( - dc, &context->res_ctx, - pipe, pipe_4to1, odm)) - goto validate_fail; - newly_split[pipe_4to1->pipe_idx] = true; - - if (odm && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe - && old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe) - old_index = old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe->pipe_idx; - else if (!odm && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe && - old_pipe->bottom_pipe->bottom_pipe->bottom_pipe && - old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state) - old_index = old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->pipe_idx; - else - old_index = -1; - pipe_4to1 = dml_find_split_pipe(dc, context, old_index); - ASSERT(pipe_4to1); - if (!pipe_4to1) - goto validate_fail; - if (!dml_split_stream_for_mpc_or_odm( - dc, &context->res_ctx, - hsplit_pipe, pipe_4to1, odm)) - goto validate_fail; - newly_split[pipe_4to1->pipe_idx] = true; - } - if (odm) - dml_build_mapped_resource(dc, context, pipe->stream); - } - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i]; - - if (pipe->plane_state) { - if (!resource_build_scaling_params(pipe)) - goto validate_fail; - } - } - - /* Actual dsc count per stream dsc validation*/ - if (!dml_validate_dsc(dc, context)) { - vba->ValidationStatus[vba->soc.num_states] = DML_FAIL_DSC_VALIDATION_FAILURE; - goto validate_fail; - } - - if (repopulate_pipes) - pipe_cnt = dml_populate_dml_pipes_from_context(dc, context, pipes, fast_validate); - *vlevel_out = vlevel; - *pipe_cnt_out = pipe_cnt; - - out = true; - goto validate_out; - -validate_fail: - out = false; - -validate_out: - return out; -} - -static void dml_calculate_dlg_params( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel) -{ - int i, pipe_idx; - int plane_count; - - /* Writeback MCIF_WB arbitration parameters */ - if (dc->res_pool) - dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt); - - context->bw_ctx.bw.dcn.clk.dispclk_khz = context->bw_ctx.dml.vba.DISPCLK * 1000; - context->bw_ctx.bw.dcn.clk.dcfclk_khz = context->bw_ctx.dml.vba.DCFCLK * 1000; - context->bw_ctx.bw.dcn.clk.socclk_khz = context->bw_ctx.dml.vba.SOCCLK * 1000; - context->bw_ctx.bw.dcn.clk.dramclk_khz = context->bw_ctx.dml.vba.DRAMSpeed * 1000 / 16; - context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = context->bw_ctx.dml.vba.DCFCLKDeepSleep * 1000; - context->bw_ctx.bw.dcn.clk.fclk_khz = context->bw_ctx.dml.vba.FabricClock * 1000; - context->bw_ctx.bw.dcn.clk.p_state_change_support = - context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] - != dm_dram_clock_change_unsupported; - - context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; - /* 'z9_support': is not a member of 'dc_clocks' - Commenting out till we have this support in dc_clocks - * context->bw_ctx.bw.dcn.clk.z9_support = (context->bw_ctx.dml.vba.StutterPeriod > 5000.0) ? - DCN_Z9_SUPPORT_ALLOW : DCN_Z9_SUPPORT_DISALLOW; - */ - plane_count = 0; - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (context->res_ctx.pipe_ctx[i].plane_state) - plane_count++; - } - - /* Commented out as per above error for now. - if (plane_count == 0) - context->bw_ctx.bw.dcn.clk.z9_support = DCN_Z9_SUPPORT_ALLOW; - */ - context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context); - context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support = - context->bw_ctx.dml.vba.FCLKChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; - if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz) - context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz; - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) { - // Phantom pipe requires that DET_SIZE = 0 and no unbounded requests - context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0; - context->res_ctx.pipe_ctx[i].unbounded_req = false; - } else { - context->res_ctx.pipe_ctx[i].det_buffer_size_kb = context->bw_ctx.dml.ip.det_buffer_size_kbytes; - context->res_ctx.pipe_ctx[i].unbounded_req = pipes[pipe_idx].pipe.src.unbounded_req_mode; - } - - if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) - context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; - context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = - pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000; - context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest; - pipe_idx++; - } - /*save a original dppclock copy*/ - context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz; - context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz; - context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dppclk_mhz * 1000; - context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dispclk_mhz * 1000; - context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes - - context->bw_ctx.dml.ip.det_buffer_size_kbytes * pipe_idx; - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - bool cstate_en = context->bw_ctx.dml.vba.PrefetchMode[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != 2; - - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - - context->bw_ctx.dml.funcs.rq_dlg_get_dlg_reg(&context->bw_ctx.dml, - &context->res_ctx.pipe_ctx[i].dlg_regs, - &context->res_ctx.pipe_ctx[i].ttu_regs, - pipes, - pipe_cnt, - pipe_idx, - cstate_en, - context->bw_ctx.bw.dcn.clk.p_state_change_support, - false, false, true); - - context->bw_ctx.dml.funcs.rq_dlg_get_rq_reg(&context->bw_ctx.dml, - &context->res_ctx.pipe_ctx[i].rq_regs, - &pipes[pipe_idx].pipe); - pipe_idx++; - } -} - -static void dml_calculate_wm_and_dlg( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel) -{ - int i, pipe_idx, vlevel_temp = 0; - - double dcfclk = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz; - double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; - unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed; - bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != - dm_dram_clock_change_unsupported; - - /* Set B: - * For Set B calculations use clocks from clock_limits[2] when available i.e. when SMU is present, - * otherwise use arbitrary low value from spreadsheet for DCFCLK as lower is safer for watermark - * calculations to cover bootup clocks. - * DCFCLK: soc.clock_limits[2] when available - * UCLK: soc.clock_limits[2] when available - */ - if (context->bw_ctx.dml.soc.num_states > 2) { - vlevel_temp = 2; - dcfclk = context->bw_ctx.dml.soc.clock_limits[2].dcfclk_mhz; - } else - dcfclk = 615; //DCFCLK Vmin_lv - - pipes[0].clks_cfg.voltage = vlevel_temp; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk; - pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz; - - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - //context->bw_ctx.bw.dcn.watermarks.b.usr_retraining_ns = get_wm_usr_retraining(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - - /* Temporary, to have some fclk_pstate_change_ns and usr_retraining_ns wm values until DML is implemented */ - //context->bw_ctx.bw.dcn.watermarks.b.usr_retraining = context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns / 8; - - /* Set D: - * All clocks min. - * DCFCLK: Min, as reported by PM FW when available - * UCLK : Min, as reported by PM FW when available - * sr_enter_exit/sr_exit should be lower than used for DRAM (TBD after bringup or later, use as decided in Clk Mgr) - */ - - if (context->bw_ctx.dml.soc.num_states > 2) { - vlevel_temp = 0; - dcfclk = dc->clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz; - } else - dcfclk = 615; //DCFCLK Vmin_lv - - pipes[0].clks_cfg.voltage = vlevel_temp; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk; - pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz; - - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - //context->bw_ctx.bw.dcn.watermarks.d.usr_retraining_ns = get_wm_usr_retraining(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - - /* Temporary, to have some fclk_pstate_change_ns and usr_retraining_ns wm values until DML is implemented */ - //context->bw_ctx.bw.dcn.watermarks.d.usr_retraining = context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns / 8; - /* Set C, for Dummy P-State: - * All clocks min. - * DCFCLK: Min, as reported by PM FW, when available - * UCLK : Min, as reported by PM FW, when available - * pstate latency as per UCLK state dummy pstate latency - */ - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { - unsigned int min_dram_speed_mts_margin = 160; - - if ((!pstate_en)) - min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16; - - /* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */ - for (i = 3; i > 0; i--) - if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts) - break; - - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us; - context->bw_ctx.dml.soc.dummy_pstate_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - //context->bw_ctx.bw.dcn.watermarks.c.usr_retraining_ns = get_wm_usr_retraining(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - - /* Temporary, to have some fclk_pstate_change_ns and usr_retraining_ns wm values until DML is implemented */ - //context->bw_ctx.bw.dcn.watermarks.c.usr_retraining = context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns / 8; - if ((!pstate_en) && (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid)) { - /* The only difference between A and C is p-state latency, if p-state is not supported - * with full p-state latency we want to calculate DLG based on dummy p-state latency, - * Set A p-state watermark set to 0 previously, when p-state unsupported, for now keep as previous implementation. - */ - context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0; - } else { - /* Set A: - * All clocks min. - * DCFCLK: Min, as reported by PM FW, when available - * UCLK: Min, as reported by PM FW, when available - */ - dml_update_soc_for_wm_a(dc, context); - context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - } - - pipes[0].clks_cfg.voltage = vlevel; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk_from_validation; - pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - - pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); - pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - - if (dc->config.forced_clocks) { - pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; - pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; - } - if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) - pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; - if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) - pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; - - pipe_idx++; - } - - context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod; - - dml_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); - - if (!pstate_en) - /* Restore full p-state latency */ - context->bw_ctx.dml.soc.dram_clock_change_latency_us = - dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; -} - -bool dml_validate(struct dc *dc, - struct dc_state *context, - bool fast_validate) -{ - bool out = false; - - BW_VAL_TRACE_SETUP(); - - int vlevel = 0; - int pipe_cnt = 0; - display_e2e_pipe_params_st *pipes = context->bw_ctx.dml.dml_pipe_state; - DC_LOGGER_INIT(dc->ctx->logger); - - BW_VAL_TRACE_COUNT(); - - out = dml_internal_validate(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate); - - if (pipe_cnt == 0) - goto validate_out; - - if (!out) - goto validate_fail; - - BW_VAL_TRACE_END_VOLTAGE_LEVEL(); - - if (fast_validate) { - BW_VAL_TRACE_SKIP(fast); - goto validate_out; - } - - dml_calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel); - - BW_VAL_TRACE_END_WATERMARKS(); - - goto validate_out; - -validate_fail: - DC_LOG_WARNING("Mode Validation Warning: %s failed validation.\n", - dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states])); - - BW_VAL_TRACE_SKIP(fail); - out = false; - -validate_out: - BW_VAL_TRACE_FINISH(); - - return out; -} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper_translation.c b/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper_translation.c deleted file mode 100644 index 4ec5310a2962d..0000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper_translation.c +++ /dev/null @@ -1,284 +0,0 @@ -/* - * Copyright 2017 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#ifdef DML_WRAPPER_TRANSLATION_ - -static void gfx10array_mode_to_dml_params( - enum array_mode_values array_mode, - enum legacy_tiling_compat_level compat_level, - unsigned int *sw_mode) -{ - switch (array_mode) { - case DC_ARRAY_LINEAR_ALLIGNED: - case DC_ARRAY_LINEAR_GENERAL: - *sw_mode = dm_sw_linear; - break; - case DC_ARRAY_2D_TILED_THIN1: -// DC_LEGACY_TILING_ADDR_GEN_ZERO - undefined as per current code hence removed -#if 0 - if (compat_level == DC_LEGACY_TILING_ADDR_GEN_ZERO) - *sw_mode = dm_sw_gfx7_2d_thin_l_vp; - else - *sw_mode = dm_sw_gfx7_2d_thin_gl; -#endif - break; - default: - ASSERT(0); /* Not supported */ - break; - } -} - -static void swizzle_to_dml_params( - enum swizzle_mode_values swizzle, - unsigned int *sw_mode) -{ - switch (swizzle) { - case DC_SW_LINEAR: - *sw_mode = dm_sw_linear; - break; - case DC_SW_4KB_S: - *sw_mode = dm_sw_4kb_s; - break; - case DC_SW_4KB_S_X: - *sw_mode = dm_sw_4kb_s_x; - break; - case DC_SW_4KB_D: - *sw_mode = dm_sw_4kb_d; - break; - case DC_SW_4KB_D_X: - *sw_mode = dm_sw_4kb_d_x; - break; - case DC_SW_64KB_S: - *sw_mode = dm_sw_64kb_s; - break; - case DC_SW_64KB_S_X: - *sw_mode = dm_sw_64kb_s_x; - break; - case DC_SW_64KB_S_T: - *sw_mode = dm_sw_64kb_s_t; - break; - case DC_SW_64KB_D: - *sw_mode = dm_sw_64kb_d; - break; - case DC_SW_64KB_D_X: - *sw_mode = dm_sw_64kb_d_x; - break; - case DC_SW_64KB_D_T: - *sw_mode = dm_sw_64kb_d_t; - break; - case DC_SW_64KB_R_X: - *sw_mode = dm_sw_64kb_r_x; - break; - case DC_SW_VAR_S: - *sw_mode = dm_sw_var_s; - break; - case DC_SW_VAR_S_X: - *sw_mode = dm_sw_var_s_x; - break; - case DC_SW_VAR_D: - *sw_mode = dm_sw_var_d; - break; - case DC_SW_VAR_D_X: - *sw_mode = dm_sw_var_d_x; - break; - - default: - ASSERT(0); /* Not supported */ - break; - } -} - -static void dc_timing_to_dml_timing(const struct dc_crtc_timing *timing, struct _vcs_dpi_display_pipe_dest_params_st *dest) -{ - dest->hblank_start = timing->h_total - timing->h_front_porch; - dest->hblank_end = dest->hblank_start - - timing->h_addressable - - timing->h_border_left - - timing->h_border_right; - dest->vblank_start = timing->v_total - timing->v_front_porch; - dest->vblank_end = dest->vblank_start - - timing->v_addressable - - timing->v_border_top - - timing->v_border_bottom; - dest->htotal = timing->h_total; - dest->vtotal = timing->v_total; - dest->hactive = timing->h_addressable; - dest->vactive = timing->v_addressable; - dest->interlaced = timing->flags.INTERLACE; - dest->pixel_rate_mhz = timing->pix_clk_100hz/10000.0; - if (timing->timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) - dest->pixel_rate_mhz *= 2; -} - -static enum odm_combine_mode get_dml_odm_combine(const struct pipe_ctx *pipe) -{ - int odm_split_count = 0; - enum odm_combine_mode combine_mode = dm_odm_combine_mode_disabled; - struct pipe_ctx *next_pipe = pipe->next_odm_pipe; - - // Traverse pipe tree to determine odm split count - while (next_pipe) { - odm_split_count++; - next_pipe = next_pipe->next_odm_pipe; - } - pipe = pipe->prev_odm_pipe; - while (pipe) { - odm_split_count++; - pipe = pipe->prev_odm_pipe; - } - - // Translate split to DML odm combine factor - switch (odm_split_count) { - case 1: - combine_mode = dm_odm_combine_mode_2to1; - break; - case 3: - combine_mode = dm_odm_combine_mode_4to1; - break; - default: - combine_mode = dm_odm_combine_mode_disabled; - } - - return combine_mode; -} - -static int get_dml_output_type(enum signal_type dc_signal) -{ - int dml_output_type = -1; - - switch (dc_signal) { - case SIGNAL_TYPE_DISPLAY_PORT_MST: - case SIGNAL_TYPE_DISPLAY_PORT: - dml_output_type = dm_dp; - break; - case SIGNAL_TYPE_EDP: - dml_output_type = dm_edp; - break; - case SIGNAL_TYPE_HDMI_TYPE_A: - case SIGNAL_TYPE_DVI_SINGLE_LINK: - case SIGNAL_TYPE_DVI_DUAL_LINK: - dml_output_type = dm_hdmi; - break; - default: - break; - } - - return dml_output_type; -} - -static void populate_color_depth_and_encoding_from_timing(const struct dc_crtc_timing *timing, struct _vcs_dpi_display_output_params_st *dout) -{ - int output_bpc = 0; - - switch (timing->display_color_depth) { - case COLOR_DEPTH_666: - output_bpc = 6; - break; - case COLOR_DEPTH_888: - output_bpc = 8; - break; - case COLOR_DEPTH_101010: - output_bpc = 10; - break; - case COLOR_DEPTH_121212: - output_bpc = 12; - break; - case COLOR_DEPTH_141414: - output_bpc = 14; - break; - case COLOR_DEPTH_161616: - output_bpc = 16; - break; - case COLOR_DEPTH_999: - output_bpc = 9; - break; - case COLOR_DEPTH_111111: - output_bpc = 11; - break; - default: - output_bpc = 8; - break; - } - - switch (timing->pixel_encoding) { - case PIXEL_ENCODING_RGB: - case PIXEL_ENCODING_YCBCR444: - dout->output_format = dm_444; - dout->output_bpp = output_bpc * 3; - break; - case PIXEL_ENCODING_YCBCR420: - dout->output_format = dm_420; - dout->output_bpp = (output_bpc * 3.0) / 2; - break; - case PIXEL_ENCODING_YCBCR422: - if (timing->flags.DSC && !timing->dsc_cfg.ycbcr422_simple) - dout->output_format = dm_n422; - else - dout->output_format = dm_s422; - dout->output_bpp = output_bpc * 2; - break; - default: - dout->output_format = dm_444; - dout->output_bpp = output_bpc * 3; - } -} - -static enum source_format_class dc_source_format_to_dml_source_format(enum surface_pixel_format dc_format) -{ - enum source_format_class dml_format = dm_444_32; - - switch (dc_format) { - case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr: - case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb: - dml_format = dm_420_8; - break; - case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr: - case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb: - dml_format = dm_420_10; - break; - case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: - case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: - case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: - dml_format = dm_444_64; - break; - case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555: - case SURFACE_PIXEL_FORMAT_GRPH_RGB565: - dml_format = dm_444_16; - break; - case SURFACE_PIXEL_FORMAT_GRPH_PALETA_256_COLORS: - dml_format = dm_444_8; - break; - case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA: - dml_format = dm_rgbe_alpha; - break; - default: - dml_format = dm_444_32; - break; - } - - return dml_format; -} - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 7e43b4403b3e3..01d46f0c1ffe4 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -312,8 +312,6 @@ struct timing_generator_funcs { int group_idx, uint32_t gsl_ready_signal); void (*set_out_mux)(struct timing_generator *tg, enum otg_out_mux_dest dest); - void (*set_vrr_m_const)(struct timing_generator *optc, - double vtotal_avg); void (*set_drr_trigger_window)(struct timing_generator *optc, uint32_t window_start, uint32_t window_end); void (*set_vtotal_change_limit)(struct timing_generator *optc, diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index e054f3494087f..9f3558c0ef110 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -247,10 +247,12 @@ enum { #define AMDGPU_FAMILY_GC_11_0_1 148 #define GC_11_0_0_A0 0x1 #define GC_11_0_2_A0 0x10 +#define GC_11_0_3_A0 0x20 #define GC_11_UNKNOWN 0xFF #define ASICREV_IS_GC_11_0_0(eChipRev) (eChipRev < GC_11_0_2_A0) -#define ASICREV_IS_GC_11_0_2(eChipRev) (eChipRev >= GC_11_0_2_A0 && eChipRev < GC_11_UNKNOWN) +#define ASICREV_IS_GC_11_0_2(eChipRev) (eChipRev >= GC_11_0_2_A0 && eChipRev < GC_11_0_3_A0) +#define ASICREV_IS_GC_11_0_3(eChipRev) (eChipRev >= GC_11_0_3_A0 && eChipRev < GC_11_UNKNOWN) /* * ASIC chip ID diff --git a/drivers/gpu/drm/amd/display/include/logger_types.h b/drivers/gpu/drm/amd/display/include/logger_types.h index 5d19a43189000..d89cf3e76c6dc 100644 --- a/drivers/gpu/drm/amd/display/include/logger_types.h +++ b/drivers/gpu/drm/amd/display/include/logger_types.h @@ -121,13 +121,15 @@ enum dc_log_type { LOG_DSC, #endif LOG_SMU_MSG, + LOG_DC2RESERVED4, + LOG_DC2RESERVED5, LOG_DWB, LOG_GAMMA_DEBUG, LOG_MAX_HW_POINTS, LOG_ALL_TF_CHANNELS, LOG_SAMPLE_1DLUT, LOG_DP2, - LOG_SECTION_TOTAL_COUNT + LOG_DC2RESERVED12, }; #define DC_MIN_LOG_MASK ((1 << LOG_ERROR) | \ diff --git a/drivers/gpu/drm/amd/dkms/config/config.h b/drivers/gpu/drm/amd/dkms/config/config.h index 58cffa4328dd7..28806304010c5 100644 --- a/drivers/gpu/drm/amd/dkms/config/config.h +++ b/drivers/gpu/drm/amd/dkms/config/config.h @@ -402,6 +402,9 @@ /* drm_dp_mst_{get,put}_port_malloc() is available */ #define HAVE_DRM_DP_MST_GET_PUT_PORT_MALLOC 1 +/* struct drm_dp_mst_port has passthrough_aux member */ +/* #undef HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX */ + /* struct drm_dp_mst_topology_cbs->destroy_connector is available */ /* #undef HAVE_DRM_DP_MST_TOPOLOGY_CBS_DESTROY_CONNECTOR */ diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-port.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-port.m4 new file mode 100644 index 0000000000000..a1f26ca53e149 --- /dev/null +++ b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-port.m4 @@ -0,0 +1,23 @@ +dnl # +dnl # commit v5.18-2579-g3af4b1f1d6e7 +dnl # "drm/dp_mst: add passthrough_aux to struct drm_dp_mst_port" +dnl +AC_DEFUN([AC_AMDGPU_DRM_DP_MST_POST_PASSTHROUGH_AUX], [ + AC_KERNEL_DO_BACKGROUND([ + AC_KERNEL_TRY_COMPILE([ + #if defined(HAVE_DRM_DISPLAY_DRM_DP_MST_HELPER_H) + #include + #elif defined(HAVE_DRM_DP_DRM_DP_MST_HELPER_H) + #include + #else + #include + #endif + ], [ + struct drm_dp_mst_port *dp_mst_port = NULL; + dp_mst_port->passthrough_aux = NULL; + ], [ + AC_DEFINE(HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX, 1, + [struct drm_dp_mst_port has passthrough_aux member]) + ]) + ]) +]) diff --git a/drivers/gpu/drm/amd/dkms/m4/kernel.m4 b/drivers/gpu/drm/amd/dkms/m4/kernel.m4 index 3bcd090500e66..42ee5087cd7b1 100644 --- a/drivers/gpu/drm/amd/dkms/m4/kernel.m4 +++ b/drivers/gpu/drm/amd/dkms/m4/kernel.m4 @@ -282,6 +282,7 @@ AC_DEFUN([AC_CONFIG_KERNEL], [ AC_AMDGPU_DRM_GEM_OBJECT_FUNCS_VMAP_HAS_IOSYS_MAP_ARG AC_AMDGPU_DRM_FB_HELPER_BUFFER AC_AMDGPU_DRM_DP_MST_TOPOLOGY_MGR_BASE + AC_AMDGPU_DRM_DP_MST_POST_PASSTHROUGH_AUX AC_KERNEL_WAIT AS_IF([test "$LINUX_OBJ" != "$LINUX"], [ diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 956b6ce81c846..1b300c569faf5 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -668,6 +668,51 @@ int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev, return ret; } +int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev, bool value) +{ + struct smu_context *smu = adev->powerplay.pp_handle; + int ret = 0; + + if (!is_support_sw_smu(adev)) + return -EOPNOTSUPP; + + mutex_lock(&adev->pm.mutex); + ret = smu_set_residency_gfxoff(smu, value); + mutex_unlock(&adev->pm.mutex); + + return ret; +} + +int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev, u32 *value) +{ + struct smu_context *smu = adev->powerplay.pp_handle; + int ret = 0; + + if (!is_support_sw_smu(adev)) + return -EOPNOTSUPP; + + mutex_lock(&adev->pm.mutex); + ret = smu_get_residency_gfxoff(smu, value); + mutex_unlock(&adev->pm.mutex); + + return ret; +} + +int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device *adev, u64 *value) +{ + struct smu_context *smu = adev->powerplay.pp_handle; + int ret = 0; + + if (!is_support_sw_smu(adev)) + return -EOPNOTSUPP; + + mutex_lock(&adev->pm.mutex); + ret = smu_get_entrycount_gfxoff(smu, value); + mutex_unlock(&adev->pm.mutex); + + return ret; +} + int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev, uint32_t *value) { struct smu_context *smu = adev->powerplay.pp_handle; diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index 65624d091ed2d..cb5b9df78b4db 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -435,6 +435,9 @@ int amdgpu_dpm_set_soft_freq_range(struct amdgpu_device *adev, int amdgpu_dpm_write_watermarks_table(struct amdgpu_device *adev); int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev, enum smu_event_type event, uint64_t event_arg); +int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev, u32 *value); +int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev, bool value); +int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device *adev, u64 *value); int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev, uint32_t *value); uint64_t amdgpu_dpm_get_thermal_throttling_counter(struct amdgpu_device *adev); void amdgpu_dpm_gfx_state_change(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 7510d470b8643..55b7910b43852 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -90,6 +90,30 @@ static int smu_sys_set_pp_feature_mask(void *handle, return smu_set_pp_feature_mask(smu, new_mask); } +int smu_set_residency_gfxoff(struct smu_context *smu, bool value) +{ + if (!smu->ppt_funcs->set_gfx_off_residency) + return -EINVAL; + + return smu_set_gfx_off_residency(smu, value); +} + +int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value) +{ + if (!smu->ppt_funcs->get_gfx_off_residency) + return -EINVAL; + + return smu_get_gfx_off_residency(smu, value); +} + +int smu_get_entrycount_gfxoff(struct smu_context *smu, u64 *value) +{ + if (!smu->ppt_funcs->get_gfx_off_entrycount) + return -EINVAL; + + return smu_get_gfx_off_entrycount(smu, value); +} + int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value) { if (!smu->ppt_funcs->get_gfx_off_status) @@ -1576,6 +1600,7 @@ static int smu_suspend(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct smu_context *smu = adev->powerplay.pp_handle; int ret; + uint64_t count; if (amdgpu_sriov_vf(adev)&& !amdgpu_sriov_is_pp_one_vf(adev)) return 0; @@ -1593,6 +1618,14 @@ static int smu_suspend(void *handle) smu_set_gfx_cgpg(smu, false); + /* + * pwfw resets entrycount when device is suspended, so we save the + * last value to be used when we resume to keep it consistent + */ + ret = smu_get_entrycount_gfxoff(smu, &count); + if (!ret) + adev->gfx.gfx_off_entrycount = count; + return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index b81c657c73860..e2fa3b066b968 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -1111,6 +1111,22 @@ struct pptable_funcs { */ uint32_t (*get_gfx_off_status)(struct smu_context *smu); + /** + * @gfx_off_entrycount: total GFXOFF entry count at the time of + * query since system power-up + */ + u32 (*get_gfx_off_entrycount)(struct smu_context *smu, uint64_t *entrycount); + + /** + * @set_gfx_off_residency: set 1 to start logging, 0 to stop logging + */ + u32 (*set_gfx_off_residency)(struct smu_context *smu, bool start); + + /** + * @get_gfx_off_residency: Average GFXOFF residency % during the logging interval + */ + u32 (*get_gfx_off_residency)(struct smu_context *smu, uint32_t *residency); + /** * @register_irq_handler: Register interupt request handlers. */ @@ -1454,6 +1470,12 @@ int smu_set_ac_dc(struct smu_context *smu); int smu_allow_xgmi_power_down(struct smu_context *smu, bool en); +int smu_get_entrycount_gfxoff(struct smu_context *smu, u64 *value); + +int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value); + +int smu_set_residency_gfxoff(struct smu_context *smu, bool value); + int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value); int smu_handle_passthrough_sbr(struct smu_context *smu, bool enable); diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h index fe130a497d6c3..7471e2df28285 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h @@ -108,7 +108,10 @@ #define PPSMC_MSG_SetSlowPPTLimit 0x4A #define PPSMC_MSG_GetFastPPTLimit 0x4B #define PPSMC_MSG_GetSlowPPTLimit 0x4C -#define PPSMC_Message_Count 0x4D +#define PPSMC_MSG_GetGfxOffStatus 0x50 +#define PPSMC_MSG_GetGfxOffEntryCount 0x51 +#define PPSMC_MSG_LogGfxOffResidency 0x52 +#define PPSMC_Message_Count 0x53 //Argument for PPSMC_MSG_GfxDeviceDriverReset enum { diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index 28f6a1eb6945c..58098b82df660 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -236,7 +236,10 @@ __SMU_DUMMY_MAP(HeavySBR), \ __SMU_DUMMY_MAP(SetBadHBMPagesRetiredFlagsPerChannel), \ __SMU_DUMMY_MAP(EnableGfxImu), \ - __SMU_DUMMY_MAP(DriverMode2Reset), + __SMU_DUMMY_MAP(DriverMode2Reset), \ + __SMU_DUMMY_MAP(GetGfxOffStatus), \ + __SMU_DUMMY_MAP(GetGfxOffEntryCount), \ + __SMU_DUMMY_MAP(LogGfxOffResidency), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index c02e5e5767282..6fe2fe92ebd75 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -28,7 +28,7 @@ #define SMU13_DRIVER_IF_VERSION_INV 0xFFFFFFFF #define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04 #define SMU13_DRIVER_IF_VERSION_ALDE 0x08 -#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x04 +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x05 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x2C #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 89504ff8e9ed7..847990145dcd9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -138,6 +138,9 @@ static struct cmn2asic_msg_mapping vangogh_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(SetSlowPPTLimit, PPSMC_MSG_SetSlowPPTLimit, 0), MSG_MAP(GetFastPPTLimit, PPSMC_MSG_GetFastPPTLimit, 0), MSG_MAP(GetSlowPPTLimit, PPSMC_MSG_GetSlowPPTLimit, 0), + MSG_MAP(GetGfxOffStatus, PPSMC_MSG_GetGfxOffStatus, 0), + MSG_MAP(GetGfxOffEntryCount, PPSMC_MSG_GetGfxOffEntryCount, 0), + MSG_MAP(LogGfxOffResidency, PPSMC_MSG_LogGfxOffResidency, 0), }; static struct cmn2asic_mapping vangogh_feature_mask_map[SMU_FEATURE_COUNT] = { @@ -2200,6 +2203,76 @@ static int vangogh_set_power_limit(struct smu_context *smu, return ret; } +/** + * vangogh_set_gfxoff_residency + * + * @smu: amdgpu_device pointer + * @start: start/stop residency log + * + * This function will be used to log gfxoff residency + * + * + * Returns standard response codes. + */ +static u32 vangogh_set_gfxoff_residency(struct smu_context *smu, bool start) +{ + int ret = 0; + u32 residency; + struct amdgpu_device *adev = smu->adev; + + if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) + return 0; + + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_LogGfxOffResidency, + start, &residency); + + if (!start) + adev->gfx.gfx_off_residency = residency; + + return ret; +} + +/** + * vangogh_get_gfxoff_residency + * + * @smu: amdgpu_device pointer + * + * This function will be used to get gfxoff residency. + * + * Returns standard response codes. + */ +static u32 vangogh_get_gfxoff_residency(struct smu_context *smu, uint32_t *residency) +{ + struct amdgpu_device *adev = smu->adev; + + *residency = adev->gfx.gfx_off_residency; + + return 0; +} + +/** + * vangogh_get_gfxoff_entrycount - get gfxoff entry count + * + * @smu: amdgpu_device pointer + * + * This function will be used to get gfxoff entry count + * + * Returns standard response codes. + */ +static u32 vangogh_get_gfxoff_entrycount(struct smu_context *smu, uint64_t *entrycount) +{ + int ret = 0, value = 0; + struct amdgpu_device *adev = smu->adev; + + if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) + return 0; + + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetGfxOffEntryCount, &value); + *entrycount = value + adev->gfx.gfx_off_entrycount; + + return ret; +} + static const struct pptable_funcs vangogh_ppt_funcs = { .check_fw_status = smu_v11_0_check_fw_status, @@ -2237,6 +2310,9 @@ static const struct pptable_funcs vangogh_ppt_funcs = { .mode2_reset = vangogh_mode2_reset, .gfx_off_control = smu_v11_0_gfx_off_control, .get_gfx_off_status = vangogh_get_gfxoff_status, + .get_gfx_off_entrycount = vangogh_get_gfxoff_entrycount, + .get_gfx_off_residency = vangogh_get_gfxoff_residency, + .set_gfx_off_residency = vangogh_set_gfxoff_residency, .get_ppt_limit = vangogh_get_ppt_limit, .get_power_limit = vangogh_get_power_limit, .set_power_limit = vangogh_set_power_limit, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index e56ec06012ddc..3651f6f750680 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -2345,8 +2345,8 @@ int smu_v13_0_set_gfx_power_up_by_imu(struct smu_context *smu) index = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_MSG, SMU_MSG_EnableGfxImu); - - return smu_cmn_send_msg_without_waiting(smu, index, 0); + /* Param 1 to tell PMFW to enable GFXOFF feature */ + return smu_cmn_send_msg_without_waiting(smu, index, 1); } int smu_v13_0_od_edit_dpm_table(struct smu_context *smu, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h index 7469bbfce1fb0..ceb13c8380679 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h @@ -47,6 +47,9 @@ #define smu_notify_memory_pool_location(smu) smu_ppt_funcs(notify_memory_pool_location, 0, smu) #define smu_gfx_off_control(smu, enable) smu_ppt_funcs(gfx_off_control, 0, smu, enable) #define smu_get_gfx_off_status(smu) smu_ppt_funcs(get_gfx_off_status, 0, smu) +#define smu_get_gfx_off_entrycount(smu, value) smu_ppt_funcs(get_gfx_off_entrycount, 0, smu, value) +#define smu_get_gfx_off_residency(smu, value) smu_ppt_funcs(get_gfx_off_residency, 0, smu, value) +#define smu_set_gfx_off_residency(smu, value) smu_ppt_funcs(set_gfx_off_residency, 0, smu, value) #define smu_set_last_dcef_min_deep_sleep_clk(smu) smu_ppt_funcs(set_last_dcef_min_deep_sleep_clk, 0, smu) #define smu_system_features_control(smu, en) smu_ppt_funcs(system_features_control, 0, smu, en) #define smu_init_max_sustainable_clocks(smu) smu_ppt_funcs(init_max_sustainable_clocks, 0, smu) diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 22dcdef082cc5..1197ecfc4e8c8 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -80,7 +80,7 @@ static int ttm_global_init(void) si_meminfo(&si); - ttm_debugfs_root = debugfs_create_dir("ttm", NULL); + ttm_debugfs_root = debugfs_create_dir(TTM_NAME, NULL); if (IS_ERR(ttm_debugfs_root)) { ttm_debugfs_root = NULL; } diff --git a/include/kcl/kcl_cpumask.h b/include/kcl/kcl_cpumask.h new file mode 100644 index 0000000000000..aee779d6ec5f2 --- /dev/null +++ b/include/kcl/kcl_cpumask.h @@ -0,0 +1,43 @@ +/*SPDX-License-Identifier: GPL-2.0*/ + +#include +#include +#include +#include +#include + +#ifndef for_each_cpu_wrap + +extern int _kcl_cpumask_next_wrap(int n, const struct cpumask *mask, + int start, bool wrap); + +static inline +int cpumask_next_wrap(int n, const struct cpumask *mask, + int start, bool wrap) +{ +return _kcl_cpumask_next_wrap(n, mask, start, wrap); +} + +/* Copied from include/linux/cpumask.h */ +#if NR_CPUS == 1 +#define for_each_cpu_wrap(cpu, mask, start) \ + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)(start)) +#else +/** + * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location + * @cpu: the (optionally unsigned) integer iterator + * @mask: the cpumask pointer + * @start: the start location + * + * The implementation does not assume any bit in @mask is set (including @start). + * + * After the loop, cpu is >= nr_cpu_ids. + */ +#define for_each_cpu_wrap(cpu, mask, start) \ + for ((cpu) = cpumask_next_wrap((start)-1, (mask), (start), false); \ + (cpu) < nr_cpumask_bits; \ + (cpu) = cpumask_next_wrap((cpu), (mask), (start), true)) + +#endif +#endif + diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index d247f1e8550a2..b9b688183f1db 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -1076,37 +1076,6 @@ struct kfd_ioctl_ipc_import_handle_args { __u32 flags; /* from KFD */ }; -struct kfd_memory_range { - __u64 va_addr; - __u64 size; -}; - -/* flags definitions - * BIT0: 0: read operation, 1: write operation. - * This also identifies if the src or dst array belongs to remote process - */ -#define KFD_CROSS_MEMORY_RW_BIT (1 << 0) -#define KFD_SET_CROSS_MEMORY_READ(flags) (flags &= ~KFD_CROSS_MEMORY_RW_BIT) -#define KFD_SET_CROSS_MEMORY_WRITE(flags) (flags |= KFD_CROSS_MEMORY_RW_BIT) -#define KFD_IS_CROSS_MEMORY_WRITE(flags) (flags & KFD_CROSS_MEMORY_RW_BIT) - -struct kfd_ioctl_cross_memory_copy_args { - /* to KFD: Process ID of the remote process */ - __u32 pid; - /* to KFD: See above definition */ - __u32 flags; - /* to KFD: Source GPU VM range */ - __u64 src_mem_range_array; - /* to KFD: Size of above array */ - __u64 src_mem_array_size; - /* to KFD: Destination GPU VM range */ - __u64 dst_mem_range_array; - /* to KFD: Size of above array */ - __u64 dst_mem_array_size; - /* from KFD: Total amount of bytes copied */ - __u64 bytes_copied; -}; - /* Guarantee host access to memory */ #define KFD_IOCTL_SVM_FLAG_HOST_ACCESS 0x00000001 /* Fine grained coherency between all devices with access */ @@ -1395,9 +1364,6 @@ struct kfd_ioctl_set_xnack_mode_args { #define AMDKFD_IOC_DBG_TRAP \ AMDKFD_IOWR(0x82, struct kfd_ioctl_dbg_trap_args) -#define AMDKFD_IOC_CROSS_MEMORY_COPY \ - AMDKFD_IOWR(0x83, struct kfd_ioctl_cross_memory_copy_args) - #define AMDKFD_IOC_RLC_SPM \ AMDKFD_IOWR(0x84, struct kfd_ioctl_spm_args) @@ -1405,5 +1371,4 @@ struct kfd_ioctl_set_xnack_mode_args { #define AMDKFD_COMMAND_START_2 0x80 #define AMDKFD_COMMAND_END_2 0x85 - #endif diff --git a/kernel/fork.c b/kernel/fork.c index 518fba1f3abac..1e9ecaa4f5ac5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1401,7 +1401,6 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) return mm; } -EXPORT_SYMBOL_GPL(mm_access); static void complete_vfork_done(struct task_struct *tsk) {