diff --git a/Documentation/gpu/amdgpu/thermal.rst b/Documentation/gpu/amdgpu/thermal.rst
index 8aeb0186c9ef8..5e27e4eb39596 100644
--- a/Documentation/gpu/amdgpu/thermal.rst
+++ b/Documentation/gpu/amdgpu/thermal.rst
@@ -63,3 +63,60 @@ gpu_metrics
 
 .. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
    :doc: gpu_metrics
+
+GFXOFF
+======
+
+GFXOFF is a feature found in most recent GPUs that saves power at runtime. The
+card's RLC (RunList Controller) firmware powers off the gfx engine
+dynamically when there is no workload on gfx or compute pipes. GFXOFF is on by
+default on supported GPUs.
+
+Userspace can interact with GFXOFF through a debugfs interface (all values in
+`uint32_t`, unless otherwise noted):
+
+``amdgpu_gfxoff``
+-----------------
+
+Use it to enable/disable GFXOFF, and to check if it's current enabled/disabled::
+
+  $ xxd -l1 -p /sys/kernel/debug/dri/0/amdgpu_gfxoff
+  01
+
+- Write 0 to disable it, and 1 to enable it.
+- Read 0 means it's disabled, 1 it's enabled.
+
+If it's enabled, that means that the GPU is free to enter into GFXOFF mode as
+needed. Disabled means that it will never enter GFXOFF mode.
+
+``amdgpu_gfxoff_status``
+------------------------
+
+Read it to check current GFXOFF's status of a GPU::
+
+  $ xxd -l1 -p /sys/kernel/debug/dri/0/amdgpu_gfxoff_status
+  02
+
+- 0: GPU is in GFXOFF state, the gfx engine is powered down.
+- 1: Transition out of GFXOFF state
+- 2: Not in GFXOFF state
+- 3: Transition into GFXOFF state
+
+If GFXOFF is enabled, the value will be transitioning around [0, 3], always
+getting into 0 when possible. When it's disabled, it's always at 2. Returns
+``-EINVAL`` if it's not supported.
+
+``amdgpu_gfxoff_count``
+-----------------------
+
+Read it to get the total GFXOFF entry count at the time of query since system
+power-up. The value is an `uint64_t` type, however, due to firmware limitations,
+it can currently overflow as an `uint32_t`. *Only supported in vangogh*
+
+``amdgpu_gfxoff_residency``
+---------------------------
+
+Write 1 to amdgpu_gfxoff_residency to start logging, and 0 to stop. Read it to
+get average GFXOFF residency % multiplied by 100 during the last logging
+interval. E.g. a value of 7854 means 78.54% of the time in the last logging
+interval the GPU was in GFXOFF mode. *Only supported in vangogh*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 1372e2b475418..89d93833f49b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -321,7 +321,7 @@ enum amdgpu_kiq_irq {
 	AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
 	AMDGPU_CP_KIQ_IRQ_LAST
 };
-
+#define SRIOV_USEC_TIMEOUT  1200000 /* wait 12 * 100ms for SRIOV */
 #define MAX_KIQ_REG_WAIT       5000 /* in usecs, 5ms */
 #define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
 #define MAX_KIQ_REG_TRY 1000
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 540178d166929..4c659178ed4b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -164,11 +164,6 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
 int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 				uint16_t pasid, enum TLB_FLUSH_TYPE flush_type);
 
-int amdgpu_amdkfd_copy_mem_to_mem(struct amdgpu_device *adev, struct kgd_mem *src_mem,
-		uint64_t src_offset, struct kgd_mem *dst_mem,
-		uint64_t dest_offset, uint64_t size, struct dma_fence **f,
-		uint64_t *actual_size);
-
 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
 
 int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev);
@@ -292,7 +287,7 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
 size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev);
 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 		struct amdgpu_device *adev, uint64_t va, uint64_t size,
-		void *drm_priv, struct sg_table *sg, struct kgd_mem **mem,
+		void *drm_priv, struct kgd_mem **mem,
 		uint64_t *offset, uint32_t flags, bool criu_resume);
 int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 		struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 7f02641d3469e..0151a50124d0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1668,11 +1668,12 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev)
 
 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 		struct amdgpu_device *adev, uint64_t va, uint64_t size,
-		void *drm_priv, struct sg_table *sg, struct kgd_mem **mem,
+		void *drm_priv, struct kgd_mem **mem,
 		uint64_t *offset, uint32_t flags, bool criu_resume)
 {
 	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
 	enum ttm_bo_type bo_type = ttm_bo_type_device;
+	struct sg_table *sg = NULL;
 	uint64_t user_addr = 0;
 	struct amdgpu_bo *bo;
 	struct drm_gem_object *gobj = NULL;
@@ -1713,10 +1714,6 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 		}
 	}
 
-	if (sg) {
-		alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
-		bo_type = ttm_bo_type_sg;
-	}
 	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
 	if (!*mem) {
 		ret = -ENOMEM;
@@ -3250,83 +3247,6 @@ int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)
 	return 0;
 }
 
-int amdgpu_amdkfd_copy_mem_to_mem(struct amdgpu_device *adev, struct kgd_mem *src_mem,
-				  uint64_t src_offset, struct kgd_mem *dst_mem,
-				  uint64_t dst_offset, uint64_t size,
-				  struct dma_fence **f, uint64_t *actual_size)
-{
-	struct amdgpu_copy_mem src, dst;
-	struct ww_acquire_ctx ticket;
-	struct list_head list, duplicates;
-	struct ttm_validate_buffer resv_list[2];
-	struct dma_fence *fence = NULL;
-	int i, r;
-
-	if (!adev|| !src_mem || !dst_mem || !actual_size)
-		return -EINVAL;
-
-	*actual_size = 0;
-
-	INIT_LIST_HEAD(&list);
-	INIT_LIST_HEAD(&duplicates);
-
-	src.bo = &src_mem->bo->tbo;
-	dst.bo = &dst_mem->bo->tbo;
-	src.mem = src.bo->resource;
-	dst.mem = dst.bo->resource;
-	src.offset = src_offset;
-	dst.offset = dst_offset;
-
-	resv_list[0].bo = src.bo;
-	resv_list[1].bo = dst.bo;
-
-	for (i = 0; i < 2; i++) {
-		resv_list[i].num_shared = 1;
-		list_add_tail(&resv_list[i].head, &list);
-	}
-
-	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
-	if (r) {
-		pr_err("Copy buffer failed. Unable to reserve bo (%d)\n", r);
-		return r;
-	}
-
-	/* The process to which the Source and Dest BOs belong to could be
-	 * evicted and the BOs invalidated. So validate BOs before use
-	 */
-	r = amdgpu_amdkfd_bo_validate(src_mem->bo, src_mem->domain, false);
-	if (r) {
-		pr_err("CMA fail: SRC BO validate failed %d\n", r);
-		goto validate_fail;
-	}
-
-
-	r = amdgpu_amdkfd_bo_validate(dst_mem->bo, dst_mem->domain, false);
-	if (r) {
-		pr_err("CMA fail: DST BO validate failed %d\n", r);
-		goto validate_fail;
-	}
-
-
-	r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, size, false, NULL,
-				       &fence);
-	if (r)
-		pr_err("Copy buffer failed %d\n", r);
-	else
-		*actual_size = size;
-	if (fence) {
-		amdgpu_bo_fence(src_mem->bo, fence, true);
-		amdgpu_bo_fence(dst_mem->bo, fence, true);
-	}
-	if (f)
-		*f = dma_fence_get(fence);
-	dma_fence_put(fence);
-
-validate_fail:
-	ttm_eu_backoff_reservation(&ticket, &list);
-	return r;
-}
-
 /* Returns GPU-specific tiling mode information */
 int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
 				struct tile_config *config)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 6ba5e8ac5e893..0db1aa66a4437 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -957,16 +957,12 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 			continue;
 
 		r = amdgpu_vm_bo_update(adev, bo_va, false);
-		if (r) {
-			mutex_unlock(&p->bo_list->bo_list_mutex);
+		if (r)
 			return r;
-		}
 
 		r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
-		if (r) {
-			mutex_unlock(&p->bo_list->bo_list_mutex);
+		if (r)
 			return r;
-		}
 	}
 
 	r = amdgpu_vm_handle_moved(adev, vm);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 27670770b384e..dd78fb44cc135 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1061,6 +1061,157 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
 	return r;
 }
 
+/**
+ * amdgpu_debugfs_gfxoff_residency_read - Read GFXOFF residency
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ *
+ * Read the last residency value logged. It doesn't auto update, one needs to
+ * stop logging before getting the current value.
+ */
+static ssize_t amdgpu_debugfs_gfxoff_residency_read(struct file *f, char __user *buf,
+						    size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	while (size) {
+		uint32_t value;
+
+		r = amdgpu_get_gfx_off_residency(adev, &value);
+		if (r)
+			goto out;
+
+		r = put_user(value, (uint32_t *)buf);
+		if (r)
+			goto out;
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	r = result;
+out:
+	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+	return r;
+}
+
+/**
+ * amdgpu_debugfs_gfxoff_residency_write - Log GFXOFF Residency
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos:  Offset to seek to
+ *
+ * Write a 32-bit non-zero to start logging; write a 32-bit zero to stop
+ */
+static ssize_t amdgpu_debugfs_gfxoff_residency_write(struct file *f, const char __user *buf,
+						     size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	while (size) {
+		u32 value;
+
+		r = get_user(value, (uint32_t *)buf);
+		if (r)
+			goto out;
+
+		amdgpu_set_gfx_off_residency(adev, value ? true : false);
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	r = result;
+out:
+	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+	return r;
+}
+
+
+/**
+ * amdgpu_debugfs_gfxoff_count_read - Read GFXOFF entry count
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ */
+static ssize_t amdgpu_debugfs_gfxoff_count_read(struct file *f, char __user *buf,
+						size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	while (size) {
+		u64 value = 0;
+
+		r = amdgpu_get_gfx_off_entrycount(adev, &value);
+		if (r)
+			goto out;
+
+		r = put_user(value, (u64 *)buf);
+		if (r)
+			goto out;
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	r = result;
+out:
+	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+	return r;
+}
+
 /**
  * amdgpu_debugfs_gfxoff_write - Enable/disable GFXOFF
  *
@@ -1268,6 +1419,19 @@ static const struct file_operations amdgpu_debugfs_gfxoff_status_fops = {
 	.llseek = default_llseek
 };
 
+static const struct file_operations amdgpu_debugfs_gfxoff_count_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_gfxoff_count_read,
+	.llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_gfxoff_residency_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_gfxoff_residency_read,
+	.write = amdgpu_debugfs_gfxoff_residency_write,
+	.llseek = default_llseek
+};
+
 static const struct file_operations *debugfs_regs[] = {
 	&amdgpu_debugfs_regs_fops,
 	&amdgpu_debugfs_regs2_fops,
@@ -1280,6 +1444,8 @@ static const struct file_operations *debugfs_regs[] = {
 	&amdgpu_debugfs_gpr_fops,
 	&amdgpu_debugfs_gfxoff_fops,
 	&amdgpu_debugfs_gfxoff_status_fops,
+	&amdgpu_debugfs_gfxoff_count_fops,
+	&amdgpu_debugfs_gfxoff_residency_fops,
 };
 
 static const char *debugfs_regs_names[] = {
@@ -1294,6 +1460,8 @@ static const char *debugfs_regs_names[] = {
 	"amdgpu_gpr",
 	"amdgpu_gfxoff",
 	"amdgpu_gfxoff_status",
+	"amdgpu_gfxoff_count",
+	"amdgpu_gfxoff_residency",
 };
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 8f92d207f2b62..69b0a8b089f59 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3594,6 +3594,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
 
 	adev->gfx.gfx_off_req_count = 1;
+	adev->gfx.gfx_off_residency = 0;
+	adev->gfx.gfx_off_entrycount = 0;
 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
 
 	atomic_set(&adev->throttling_logging_enabled, 1);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 454a78ba60d43..ceb91469958aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -610,6 +610,45 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
 	mutex_unlock(&adev->gfx.gfx_off_mutex);
 }
 
+int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value)
+{
+	int r = 0;
+
+	mutex_lock(&adev->gfx.gfx_off_mutex);
+
+	r = amdgpu_dpm_set_residency_gfxoff(adev, value);
+
+	mutex_unlock(&adev->gfx.gfx_off_mutex);
+
+	return r;
+}
+
+int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value)
+{
+	int r = 0;
+
+	mutex_lock(&adev->gfx.gfx_off_mutex);
+
+	r = amdgpu_dpm_get_residency_gfxoff(adev, value);
+
+	mutex_unlock(&adev->gfx.gfx_off_mutex);
+
+	return r;
+}
+
+int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value)
+{
+	int r = 0;
+
+	mutex_lock(&adev->gfx.gfx_off_mutex);
+
+	r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
+
+	mutex_unlock(&adev->gfx.gfx_off_mutex);
+
+	return r;
+}
+
 int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
 {
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 1581067698cab..027e993ff45ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -343,10 +343,12 @@ struct amdgpu_gfx {
 	uint32_t                        srbm_soft_reset;
 
 	/* gfx off */
-	bool                            gfx_off_state; /* true: enabled, false: disabled */
-	struct mutex                    gfx_off_mutex;
-	uint32_t                        gfx_off_req_count; /* default 1, enable gfx off: dec 1, disable gfx off: add 1 */
-	struct delayed_work             gfx_off_delay_work;
+	bool                            gfx_off_state;      /* true: enabled, false: disabled */
+	struct mutex                    gfx_off_mutex;      /* mutex to change gfxoff state */
+	uint32_t                        gfx_off_req_count;  /* default 1, enable gfx off: dec 1, disable gfx off: add 1 */
+	struct delayed_work             gfx_off_delay_work; /* async work to set gfx block off */
+	uint32_t                        gfx_off_residency;  /* last logged residency */
+	uint64_t                        gfx_off_entrycount; /* count of times GPU has get into GFXOFF state */
 
 	/* pipe reservation */
 	struct mutex			pipe_reserve_mutex;
@@ -418,6 +420,10 @@ bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me,
 void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
 int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value);
 int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
+void amdgpu_gfx_ras_fini(struct amdgpu_device *adev);
+int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value);
+int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *residency);
+int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value);
 int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
 		void *err_data,
 		struct amdgpu_iv_entry *entry);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 723721bdd6bf9..1d65e638f376f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -279,10 +279,6 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
 	/* Signal all jobs not yet scheduled */
 	for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
 		struct drm_sched_rq *rq = &sched->sched_rq[i];
-
-		if (!rq)
-			continue;
-
 		spin_lock(&rq->lock);
 		list_for_each_entry(s_entity, &rq->entities, list) {
 			while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) {
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
index 33a8a7365aef9..f0e235f98afb2 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c
@@ -28,13 +28,44 @@
 #include "navi10_enum.h"
 #include "soc15_common.h"
 
+#define regATHUB_MISC_CNTL_V3_0_1			0x00d7
+#define regATHUB_MISC_CNTL_V3_0_1_BASE_IDX		0
+
+
+static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev)
+{
+	uint32_t data;
+
+	switch (adev->ip_versions[ATHUB_HWIP][0]) {
+	case IP_VERSION(3, 0, 1):
+		data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1);
+		break;
+	default:
+		data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+		break;
+	}
+	return data;
+}
+
+static void athub_v3_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data)
+{
+	switch (adev->ip_versions[ATHUB_HWIP][0]) {
+	case IP_VERSION(3, 0, 1):
+		WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1, data);
+		break;
+	default:
+		WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
+		break;
+	}
+}
+
 static void
 athub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
 					    bool enable)
 {
 	uint32_t def, data;
 
-	def = data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+	def = data = athub_v3_0_get_cg_cntl(adev);
 
 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_MGCG))
 		data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
@@ -42,7 +73,7 @@ athub_v3_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
 		data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
 
 	if (def != data)
-		WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
+		athub_v3_0_set_cg_cntl(adev, data);
 }
 
 static void
@@ -51,7 +82,7 @@ athub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
 {
 	uint32_t def, data;
 
-	def = data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+	def = data = athub_v3_0_get_cg_cntl(adev);
 
 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_LS))
 		data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
@@ -59,7 +90,7 @@ athub_v3_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
 		data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
 
 	if (def != data)
-		WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data);
+		athub_v3_0_set_cg_cntl(adev, data);
 }
 
 int athub_v3_0_set_clockgating(struct amdgpu_device *adev,
@@ -70,6 +101,7 @@ int athub_v3_0_set_clockgating(struct amdgpu_device *adev,
 
 	switch (adev->ip_versions[ATHUB_HWIP][0]) {
 	case IP_VERSION(3, 0, 0):
+	case IP_VERSION(3, 0, 1):
 	case IP_VERSION(3, 0, 2):
 		athub_v3_0_update_medium_grain_clock_gating(adev,
 				state == AMD_CG_STATE_GATE);
@@ -88,7 +120,7 @@ void athub_v3_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
 	int data;
 
 	/* AMD_CG_SUPPORT_ATHUB_MGCG */
-	data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL);
+	data = athub_v3_0_get_cg_cntl(adev);
 	if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK)
 		*flags |= AMD_CG_SUPPORT_ATHUB_MGCG;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index f61d7c7c9d262..8992e60c90b22 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4846,7 +4846,7 @@ static int gfx_v10_0_sw_init(void *handle)
 	case IP_VERSION(10, 3, 3):
 	case IP_VERSION(10, 3, 7):
 		adev->gfx.me.num_me = 1;
-		adev->gfx.me.num_pipe_per_me = 2;
+		adev->gfx.me.num_pipe_per_me = 1;
 		adev->gfx.me.num_queue_per_pipe = 1;
 		adev->gfx.mec.num_mec = 2;
 		adev->gfx.mec.num_pipe_per_mec = 4;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 91a1c995df396..253ffa4c62be0 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -126,6 +126,8 @@ MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
+MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
 
 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
@@ -1496,7 +1498,11 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
 	const struct common_firmware_header *header = NULL;
 	const struct gfx_firmware_header_v1_0 *cp_hdr;
 
-	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
+	if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
+		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec.bin", chip_name);
+	else
+		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
+
 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
 	if (err)
 		goto out;
@@ -1509,7 +1515,11 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
 
 
 	if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
-		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
+		if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
+			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec2.bin", chip_name);
+		else
+			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
+
 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
 		if (!err) {
 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 482380fe76db3..750bb16dda36d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -418,6 +418,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 	uint32_t seq;
 	uint16_t queried_pasid;
 	bool ret;
+	u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
 	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 
@@ -436,7 +437,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 
 		amdgpu_ring_commit(ring);
 		spin_unlock(&adev->gfx.kiq.ring_lock);
-		r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+		r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
 		if (r < 1) {
 			dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
 			return -ETIME;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 4f4aaed3a0974..382d1a18f6f03 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -894,6 +894,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 	uint32_t seq;
 	uint16_t queried_pasid;
 	bool ret;
+	u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
 	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 
@@ -932,7 +933,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 
 		amdgpu_ring_commit(ring);
 		spin_unlock(&adev->gfx.kiq.ring_lock);
-		r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+		r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
 		if (r < 1) {
 			dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
 			return -ETIME;
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
index 39a696cd45b5e..29c3484ae1f16 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
@@ -40,6 +40,156 @@ static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev,
 			0);
 }
 
+static void hdp_v5_2_update_mem_power_gating(struct amdgpu_device *adev,
+					     bool enable)
+{
+	uint32_t hdp_clk_cntl;
+	uint32_t hdp_mem_pwr_cntl;
+
+	if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS |
+				AMD_CG_SUPPORT_HDP_DS |
+				AMD_CG_SUPPORT_HDP_SD)))
+		return;
+
+	hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
+	hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+
+	/* Before doing clock/power mode switch, forced on MEM clock */
+	hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+				     ATOMIC_MEM_CLK_SOFT_OVERRIDE, 1);
+	hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+				     RC_MEM_CLK_SOFT_OVERRIDE, 1);
+	WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+
+	/* disable clock and power gating before any changing */
+	hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+					 ATOMIC_MEM_POWER_CTRL_EN, 0);
+	hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+					 ATOMIC_MEM_POWER_LS_EN, 0);
+	hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+					 ATOMIC_MEM_POWER_DS_EN, 0);
+	hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+					 ATOMIC_MEM_POWER_SD_EN, 0);
+	hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+					 RC_MEM_POWER_CTRL_EN, 0);
+	hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+					 RC_MEM_POWER_LS_EN, 0);
+	hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+					 RC_MEM_POWER_DS_EN, 0);
+	hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+					 RC_MEM_POWER_SD_EN, 0);
+	WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+
+	/* Already disabled above. The actions below are for "enabled" only */
+	if (enable) {
+		/* only one clock gating mode (LS/DS/SD) can be enabled */
+		if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) {
+			hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+							 HDP_MEM_POWER_CTRL,
+							 ATOMIC_MEM_POWER_SD_EN, 1);
+			hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+							 HDP_MEM_POWER_CTRL,
+							 RC_MEM_POWER_SD_EN, 1);
+		} else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
+			hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+							 HDP_MEM_POWER_CTRL,
+							 ATOMIC_MEM_POWER_LS_EN, 1);
+			hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+							 HDP_MEM_POWER_CTRL,
+							 RC_MEM_POWER_LS_EN, 1);
+		} else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) {
+			hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+							 HDP_MEM_POWER_CTRL,
+							 ATOMIC_MEM_POWER_DS_EN, 1);
+			hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+							 HDP_MEM_POWER_CTRL,
+							 RC_MEM_POWER_DS_EN, 1);
+		}
+
+		/* confirmed that ATOMIC/RC_MEM_POWER_CTRL_EN have to be set for SRAM LS/DS/SD */
+		if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS |
+				      AMD_CG_SUPPORT_HDP_SD)) {
+			hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+							 ATOMIC_MEM_POWER_CTRL_EN, 1);
+			hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+							 RC_MEM_POWER_CTRL_EN, 1);
+			WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+		}
+	}
+
+	/* disable MEM clock override after clock/power mode changing */
+	hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+				     ATOMIC_MEM_CLK_SOFT_OVERRIDE, 0);
+	hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+				     RC_MEM_CLK_SOFT_OVERRIDE, 0);
+	WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+}
+
+static void hdp_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+						      bool enable)
+{
+	uint32_t hdp_clk_cntl;
+
+	if (!(adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG))
+		return;
+
+	hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
+
+	if (enable) {
+		hdp_clk_cntl &=
+			~(uint32_t)
+			(HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK |
+			 HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+			 HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+			 HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+			 HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+			 HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK);
+	} else {
+		hdp_clk_cntl |= HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK |
+			HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+			HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+			HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+			HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+			HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK;
+	}
+
+	WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl);
+}
+
+static void hdp_v5_2_get_clockgating_state(struct amdgpu_device *adev,
+					   u64 *flags)
+{
+	uint32_t tmp;
+
+	/* AMD_CG_SUPPORT_HDP_MGCG */
+	tmp = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL);
+	if (!(tmp & (HDP_CLK_CNTL__ATOMIC_MEM_CLK_SOFT_OVERRIDE_MASK |
+		     HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK |
+		     HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK |
+		     HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK |
+		     HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK |
+		     HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK)))
+		*flags |= AMD_CG_SUPPORT_HDP_MGCG;
+
+	/* AMD_CG_SUPPORT_HDP_LS/DS/SD */
+	tmp = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL);
+	if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_LS_EN_MASK)
+		*flags |= AMD_CG_SUPPORT_HDP_LS;
+	else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_DS_EN_MASK)
+		*flags |= AMD_CG_SUPPORT_HDP_DS;
+	else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_SD_EN_MASK)
+		*flags |= AMD_CG_SUPPORT_HDP_SD;
+}
+
+static void hdp_v5_2_update_clock_gating(struct amdgpu_device *adev,
+					      bool enable)
+{
+	hdp_v5_2_update_mem_power_gating(adev, enable);
+	hdp_v5_2_update_medium_grain_clock_gating(adev, enable);
+}
+
 const struct amdgpu_hdp_funcs hdp_v5_2_funcs = {
 	.flush_hdp = hdp_v5_2_flush_hdp,
+	.update_clock_gating = hdp_v5_2_update_clock_gating,
+	.get_clock_gating_state = hdp_v5_2_get_clockgating_state,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
index 92dc60a9d2094..085e613f3646d 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
@@ -727,6 +727,7 @@ static const struct amd_ip_funcs ih_v6_0_ip_funcs = {
 static const struct amdgpu_ih_funcs ih_v6_0_funcs = {
 	.get_wptr = ih_v6_0_get_wptr,
 	.decode_iv = amdgpu_ih_decode_iv_helper,
+	.decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
 	.set_rptr = ih_v6_0_set_rptr
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
index cac72ced94c85..e8058edc1d108 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
@@ -518,18 +518,41 @@ static u64 mmhub_v3_0_1_get_mc_fb_offset(struct amdgpu_device *adev)
 static void mmhub_v3_0_1_update_medium_grain_clock_gating(struct amdgpu_device *adev,
 							  bool enable)
 {
-	//TODO
+	uint32_t def, data;
+
+	def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+	if (enable)
+		data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
+	else
+		data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK;
+
+	if (def != data)
+		WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
 }
 
 static void mmhub_v3_0_1_update_medium_grain_light_sleep(struct amdgpu_device *adev,
 							 bool enable)
 {
-	//TODO
+	uint32_t def, data;
+
+	def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+	if (enable)
+		data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+	else
+		data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
+
+	if (def != data)
+		WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
 }
 
 static int mmhub_v3_0_1_set_clockgating(struct amdgpu_device *adev,
 					enum amd_clockgating_state state)
 {
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
 	mmhub_v3_0_1_update_medium_grain_clock_gating(adev,
 			state == AMD_CG_STATE_GATE);
 	mmhub_v3_0_1_update_medium_grain_light_sleep(adev,
@@ -539,7 +562,20 @@ static int mmhub_v3_0_1_set_clockgating(struct amdgpu_device *adev,
 
 static void mmhub_v3_0_1_get_clockgating(struct amdgpu_device *adev, u64 *flags)
 {
-	//TODO
+	int data;
+
+	if (amdgpu_sriov_vf(adev))
+		*flags = 0;
+
+	data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
+
+	/* AMD_CG_SUPPORT_MC_MGCG */
+	if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK)
+		*flags |= AMD_CG_SUPPORT_MC_MGCG;
+
+	/* AMD_CG_SUPPORT_MC_LS */
+	if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
+		*flags |= AMD_CG_SUPPORT_MC_LS;
 }
 
 const struct amdgpu_mmhub_funcs mmhub_v3_0_1_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index 4b5396d3e60f6..eec13cb5bf758 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -409,9 +409,11 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
 	u32 wptr, tmp;
 	struct amdgpu_ih_regs *ih_regs;
 
-	if (ih == &adev->irq.ih) {
+	if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) {
 		/* Only ring0 supports writeback. On other rings fall back
 		 * to register-based code with overflow checking below.
+		 * ih_soft ring doesn't have any backing hardware registers,
+		 * update wptr and return.
 		 */
 		wptr = le32_to_cpu(*ih->wptr_cpu);
 
@@ -483,6 +485,9 @@ static void navi10_ih_set_rptr(struct amdgpu_device *adev,
 {
 	struct amdgpu_ih_regs *ih_regs;
 
+	if (ih == &adev->irq.ih_soft)
+		return;
+
 	if (ih->use_doorbell) {
 		/* XXX check if swapping is necessary on BE */
 		*ih->rptr_cpu = ih->rptr;
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
index a2588200ea580..0b2ac418e4ac4 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
@@ -101,6 +101,16 @@ static int psp_v12_0_init_microcode(struct psp_context *psp)
 		adev->psp.dtm_context.context.bin_desc.start_addr =
 			(uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
 			le32_to_cpu(ta_hdr->dtm.offset_bytes);
+
+		if (adev->apu_flags & AMD_APU_IS_RENOIR) {
+			adev->psp.securedisplay_context.context.bin_desc.fw_version =
+				le32_to_cpu(ta_hdr->securedisplay.fw_version);
+			adev->psp.securedisplay_context.context.bin_desc.size_bytes =
+				le32_to_cpu(ta_hdr->securedisplay.size_bytes);
+			adev->psp.securedisplay_context.context.bin_desc.start_addr =
+				(uint8_t *)adev->psp.hdcp_context.context.bin_desc.start_addr +
+				le32_to_cpu(ta_hdr->securedisplay.offset_bytes);
+		}
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index 77d549dbe2a8c..1ff7fc7bb3400 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -577,7 +577,9 @@ static int soc21_common_early_init(void *handle)
 			AMD_CG_SUPPORT_VCN_MGCG |
 			AMD_CG_SUPPORT_JPEG_MGCG |
 			AMD_CG_SUPPORT_ATHUB_MGCG |
-			AMD_CG_SUPPORT_ATHUB_LS;
+			AMD_CG_SUPPORT_ATHUB_LS |
+			AMD_CG_SUPPORT_IH_CG |
+			AMD_CG_SUPPORT_HDP_SD;
 		adev->pg_flags =
 			AMD_PG_SUPPORT_VCN |
 			AMD_PG_SUPPORT_VCN_DPG |
@@ -594,6 +596,13 @@ static int soc21_common_early_init(void *handle)
 			AMD_CG_SUPPORT_GFX_FGCG |
 			AMD_CG_SUPPORT_REPEATER_FGCG |
 			AMD_CG_SUPPORT_GFX_PERF_CLK |
+			AMD_CG_SUPPORT_MC_MGCG |
+			AMD_CG_SUPPORT_MC_LS |
+			AMD_CG_SUPPORT_HDP_MGCG |
+			AMD_CG_SUPPORT_HDP_LS |
+			AMD_CG_SUPPORT_ATHUB_MGCG |
+			AMD_CG_SUPPORT_ATHUB_LS |
+			AMD_CG_SUPPORT_IH_CG |
 			AMD_CG_SUPPORT_VCN_MGCG |
 			AMD_CG_SUPPORT_JPEG_MGCG;
 		adev->pg_flags =
@@ -692,6 +701,7 @@ static int soc21_common_set_clockgating_state(void *handle,
 
 	switch (adev->ip_versions[NBIO_HWIP][0]) {
 	case IP_VERSION(4, 3, 0):
+	case IP_VERSION(4, 3, 1):
 		adev->nbio.funcs->update_medium_grain_clock_gating(adev,
 				state == AMD_CG_STATE_GATE);
 		adev->nbio.funcs->update_medium_grain_light_sleep(adev,
@@ -699,6 +709,10 @@ static int soc21_common_set_clockgating_state(void *handle,
 		adev->hdp.funcs->update_clock_gating(adev,
 				state == AMD_CG_STATE_GATE);
 		break;
+	case IP_VERSION(7, 7, 0):
+		adev->hdp.funcs->update_clock_gating(adev,
+				state == AMD_CG_STATE_GATE);
+		break;
 	default:
 		break;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index ca14c3ef742ec..fb2d74f304481 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -1115,7 +1115,7 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
  *
  * Stop VCN block with dpg mode
  */
-static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
+static void vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
 {
 	uint32_t tmp;
 
@@ -1133,7 +1133,6 @@ static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
 	/* disable dynamic power gating mode */
 	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0,
 		~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
-	return 0;
 }
 
 /**
@@ -1154,7 +1153,7 @@ static int vcn_v4_0_stop(struct amdgpu_device *adev)
 		fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
 
 		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
-			r = vcn_v4_0_stop_dpg_mode(adev, i);
+			vcn_v4_0_stop_dpg_mode(adev, i);
 			continue;
 		}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index cdd599a081258..03b7066471f9a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -334,9 +334,11 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev,
 	u32 wptr, tmp;
 	struct amdgpu_ih_regs *ih_regs;
 
-	if (ih == &adev->irq.ih) {
+	if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) {
 		/* Only ring0 supports writeback. On other rings fall back
 		 * to register-based code with overflow checking below.
+		 * ih_soft ring doesn't have any backing hardware registers,
+		 * update wptr and return.
 		 */
 		wptr = le32_to_cpu(*ih->wptr_cpu);
 
@@ -409,6 +411,9 @@ static void vega10_ih_set_rptr(struct amdgpu_device *adev,
 {
 	struct amdgpu_ih_regs *ih_regs;
 
+	if (ih == &adev->irq.ih_soft)
+		return;
+
 	if (ih->use_doorbell) {
 		/* XXX check if swapping is necessary on BE */
 		*ih->rptr_cpu = ih->rptr;
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
index 3b4eb8285943c..2022ffbb8dba5 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
@@ -385,9 +385,11 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev,
 	u32 wptr, tmp;
 	struct amdgpu_ih_regs *ih_regs;
 
-	if (ih == &adev->irq.ih) {
+	if (ih == &adev->irq.ih || ih == &adev->irq.ih_soft) {
 		/* Only ring0 supports writeback. On other rings fall back
 		 * to register-based code with overflow checking below.
+		 * ih_soft ring doesn't have any backing hardware registers,
+		 * update wptr and return.
 		 */
 		wptr = le32_to_cpu(*ih->wptr_cpu);
 
@@ -461,6 +463,9 @@ static void vega20_ih_set_rptr(struct amdgpu_device *adev,
 {
 	struct amdgpu_ih_regs *ih_regs;
 
+	if (ih == &adev->irq.ih_soft)
+		return;
+
 	if (ih->use_doorbell) {
 		/* XXX check if swapping is necessary on BE */
 		*ih->rptr_cpu = ih->rptr;
diff --git a/drivers/gpu/drm/amd/amdkcl/Makefile b/drivers/gpu/drm/amd/amdkcl/Makefile
index 114f3861ac5ff..36e1f19709ea1 100644
--- a/drivers/gpu/drm/amd/amdkcl/Makefile
+++ b/drivers/gpu/drm/amd/amdkcl/Makefile
@@ -13,7 +13,7 @@ amdkcl-y += kcl_backlight.o kcl_ioctl.o \
 	kcl_device_cgroup.o kcl_mn.o kcl_drm_modes.o kcl_time.o kcl_ftrace.o \
 	kcl_acpi_table.o kcl_page_alloc.o kcl_numa.o kcl_fs_read_write.o kcl_drm_aperture.o \
 	kcl_drm_drv.o kcl_drm_simple_kms_helper.o kcl_bitmap.o kcl_vmscan.o kcl_dma_fence_chain.o \
-	kcl_mce_amd.o kcl_workqueue.o
+	kcl_mce_amd.o kcl_workqueue.o kcl_cpumask.o
 
 amdkcl-$(CONFIG_DRM_AMD_DC_HDCP) += kcl_drm_hdcp.o
 amdkcl-$(CONFIG_MMU_NOTIFIER) += kcl_mn.o
diff --git a/drivers/gpu/drm/amd/amdkcl/kcl_cpumask.c b/drivers/gpu/drm/amd/amdkcl/kcl_cpumask.c
new file mode 100644
index 0000000000000..fe36b386ff52b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkcl/kcl_cpumask.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/cpumask.h>
+#ifndef for_each_cpu_wrap
+/* copied from lib/cpumask.c
+/**
+ * cpumask_next_wrap - helper to implement for_each_cpu_wrap
+ * @n: the cpu prior to the place to search
+ * @mask: the cpumask pointer
+ * @start: the start point of the iteration
+ * @wrap: assume @n crossing @start terminates the iteration
+ *
+ * Returns >= nr_cpu_ids on completion
+ *
+ * Note: the @wrap argument is required for the start condition when
+ * we cannot assume @start is set in @mask.
+ */
+int _kcl_cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap)
+{
+        int next;
+
+again:
+        next = cpumask_next(n, mask);
+
+        if (wrap && n < start && next >= start) {
+                return nr_cpumask_bits;
+
+        } else if (next >= nr_cpumask_bits) {
+                wrap = true;
+                n = -1;
+                goto again;
+        }
+
+        return next;
+}
+EXPORT_SYMBOL(_kcl_cpumask_next_wrap);
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index d7fb67b63f4bf..f16b7206c858f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1150,7 +1150,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
 
 	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 		dev->adev, args->va_addr, args->size,
-		pdd->drm_priv, NULL, (struct kgd_mem **) &mem, &offset,
+		pdd->drm_priv, (struct kgd_mem **) &mem, &offset,
 		flags, false);
 
 	if (err)
@@ -1628,867 +1628,6 @@ static int kfd_ioctl_ipc_import_handle(struct file *filep,
 	return r;
 }
 
-#ifndef PTRACE_MODE_ATTACH_REALCREDS
-#define PTRACE_MODE_ATTACH_REALCREDS  PTRACE_MODE_ATTACH
-#endif
-
-/* Maximum number of entries for process pages array which lives on stack */
-#define MAX_PP_STACK_COUNT 16
-/* Maximum number of pages kmalloc'd to hold struct page's during copy */
-#define MAX_KMALLOC_PAGES (PAGE_SIZE * 2)
-#define MAX_PP_KMALLOC_COUNT (MAX_KMALLOC_PAGES/sizeof(struct page *))
-
-static void kfd_put_sg_table(struct sg_table *sg)
-{
-	unsigned int i;
-	struct scatterlist *s;
-
-	for_each_sg(sg->sgl, s, sg->nents, i)
-		put_page(sg_page(s));
-}
-
-
-/* Create a sg table for the given userptr BO by pinning its system pages
- * @bo: userptr BO
- * @offset: Offset into BO
- * @mm/@task: mm_struct & task_struct of the process that holds the BO
- * @size: in/out: desired size / actual size which could be smaller
- * @sg_size: out: Size of sg table. This is ALIGN_UP(@size)
- * @ret_sg: out sg table
- */
-static int kfd_create_sg_table_from_userptr_bo(struct kfd_bo *bo,
-					       int64_t offset, int cma_write,
-					       struct mm_struct *mm,
-					       struct task_struct *task,
-					       uint64_t *size,
-					       uint64_t *sg_size,
-					       struct sg_table **ret_sg)
-{
-	int ret, locked = 1;
-	struct sg_table *sg = NULL;
-	unsigned int i, offset_in_page, flags = 0;
-	unsigned long nents, n;
-	unsigned long pa = (bo->cpuva + offset) & PAGE_MASK;
-	unsigned int cur_page = 0;
-	struct scatterlist *s;
-	uint64_t sz = *size;
-	struct page **process_pages;
-
-	*sg_size = 0;
-	sg = kmalloc(sizeof(*sg), GFP_KERNEL);
-	if (!sg)
-		return -ENOMEM;
-
-	offset_in_page = offset & (PAGE_SIZE - 1);
-	nents = (sz + offset_in_page + PAGE_SIZE - 1) / PAGE_SIZE;
-
-	ret = sg_alloc_table(sg, nents, GFP_KERNEL);
-	if (unlikely(ret)) {
-		ret = -ENOMEM;
-		goto sg_alloc_fail;
-	}
-	process_pages = kmalloc_array(nents, sizeof(struct pages *),
-				      GFP_KERNEL);
-	if (!process_pages) {
-		ret = -ENOMEM;
-		goto page_alloc_fail;
-	}
-
-	if (cma_write)
-		flags = FOLL_WRITE;
-	locked = 1;
-	mmap_read_lock(mm);
-	n = kcl_get_user_pages_remote(task, mm, pa, nents, flags, process_pages,
-				  NULL, &locked);
-	if (locked)
-		mmap_read_unlock(mm);
-	if (n <= 0) {
-		pr_err("CMA: Invalid virtual address 0x%lx\n", pa);
-		ret = -EFAULT;
-		goto get_user_fail;
-	}
-	if (n != nents) {
-		/* Pages pinned < requested. Set the size accordingly */
-		*size = (n * PAGE_SIZE) - offset_in_page;
-		pr_debug("Requested %lx but pinned %lx\n", nents, n);
-	}
-
-	sz = 0;
-	for_each_sg(sg->sgl, s, n, i) {
-		sg_set_page(s, process_pages[cur_page], PAGE_SIZE,
-			    offset_in_page);
-		sg_dma_address(s) = page_to_phys(process_pages[cur_page]);
-		offset_in_page = 0;
-		cur_page++;
-		sz += PAGE_SIZE;
-	}
-	*ret_sg = sg;
-	*sg_size = sz;
-
-	kfree(process_pages);
-	return 0;
-
-get_user_fail:
-	kfree(process_pages);
-page_alloc_fail:
-	sg_free_table(sg);
-sg_alloc_fail:
-	kfree(sg);
-	return ret;
-}
-
-static void kfd_free_cma_bos(struct cma_iter *ci)
-{
-	struct cma_system_bo *cma_bo, *tmp;
-
-	list_for_each_entry_safe(cma_bo, tmp, &ci->cma_list, list) {
-		struct kfd_dev *dev = cma_bo->dev;
-		struct kfd_process_device *pdd;
-
-		/* sg table is deleted by free_memory_of_gpu */
-		if (cma_bo->sg)
-			kfd_put_sg_table(cma_bo->sg);
-		pdd = kfd_get_process_device_data(dev, ci->p);
-		amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, cma_bo->mem, pdd->drm_priv, NULL);
-		list_del(&cma_bo->list);
-		kfree(cma_bo);
-	}
-}
-
-/* 1 second timeout */
-#define CMA_WAIT_TIMEOUT msecs_to_jiffies(1000)
-
-static int kfd_cma_fence_wait(struct dma_fence *f)
-{
-	int ret;
-
-	ret = dma_fence_wait_timeout(f, false, CMA_WAIT_TIMEOUT);
-	if (likely(ret > 0))
-		return 0;
-	if (!ret)
-		ret = -ETIME;
-	return ret;
-}
-
-/* Put previous (old) fence @pf but it waits for @pf to signal if the context
- * of the current fence @cf is different.
- */
-static int kfd_fence_put_wait_if_diff_context(struct dma_fence *cf,
-					      struct dma_fence *pf)
-{
-	int ret = 0;
-
-	if (pf && cf && cf->context != pf->context)
-		ret = kfd_cma_fence_wait(pf);
-	dma_fence_put(pf);
-	return ret;
-}
-
-#define MAX_SYSTEM_BO_SIZE (512*PAGE_SIZE)
-
-/* Create an equivalent system BO for the given @bo. If @bo is a userptr then
- * create a new system BO by pinning underlying system pages of the given
- * userptr BO. If @bo is in Local Memory then create an empty system BO and
- * then copy @bo into this new BO.
- * @bo: Userptr BO or Local Memory BO
- * @offset: Offset into bo
- * @size: in/out: The size of the new BO could be less than requested if all
- *        the pages couldn't be pinned or size > MAX_SYSTEM_BO_SIZE. This would
- *        be reflected in @size
- * @mm/@task: mm/task to which @bo belongs to
- * @cma_bo: out: new system BO
- */
-static int kfd_create_cma_system_bo(struct kfd_dev *kdev, struct kfd_bo *bo,
-				    uint64_t *size, uint64_t offset,
-				    int cma_write, struct kfd_process *p,
-				    struct mm_struct *mm,
-				    struct task_struct *task,
-				    struct cma_system_bo **cma_bo)
-{
-	int ret;
-	struct kfd_process_device *pdd = NULL;
-	struct cma_system_bo *cbo;
-	uint64_t bo_size = 0;
-	struct dma_fence *f;
-
-	uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
-			 KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE;
-
-	*cma_bo = NULL;
-	cbo = kzalloc(sizeof(**cma_bo), GFP_KERNEL);
-	if (!cbo)
-		return -ENOMEM;
-
-	INIT_LIST_HEAD(&cbo->list);
-	if (bo->mem_type == KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
-		bo_size = min_t(uint64_t, *size, MAX_SYSTEM_BO_SIZE);
-	else if (bo->cpuva) {
-		ret = kfd_create_sg_table_from_userptr_bo(bo, offset,
-							  cma_write, mm, task,
-							  size, &bo_size,
-							  &cbo->sg);
-		if (ret) {
-			pr_err("CMA: BO create with sg failed %d\n", ret);
-			goto sg_fail;
-		}
-	} else {
-		WARN_ON(1);
-		ret = -EINVAL;
-		goto sg_fail;
-	}
-	mutex_lock(&p->mutex);
-	pdd = kfd_get_process_device_data(kdev, p);
-	if (!pdd) {
-		mutex_unlock(&p->mutex);
-		pr_err("Process device data doesn't exist\n");
-		ret = -EINVAL;
-		goto pdd_fail;
-	}
-
-	ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, 0ULL, bo_size,
-						      pdd->drm_priv, cbo->sg,
-						      &cbo->mem, NULL, flags,
-						      false);
-	mutex_unlock(&p->mutex);
-	if (ret) {
-		pr_err("Failed to create shadow system BO %d\n", ret);
-		goto pdd_fail;
-	}
-
-	if (bo->mem_type == KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
-		ret = amdgpu_amdkfd_copy_mem_to_mem(kdev->adev, bo->mem,
-						    offset, cbo->mem, 0,
-						    bo_size, &f, size);
-		if (ret) {
-			pr_err("CMA: Intermediate copy failed %d\n", ret);
-			goto copy_fail;
-		}
-
-		/* Wait for the copy to finish as subsequent copy will be done
-		 * by different device
-		 */
-		ret = kfd_cma_fence_wait(f);
-		dma_fence_put(f);
-		if (ret) {
-			pr_err("CMA: Intermediate copy timed out %d\n", ret);
-			goto copy_fail;
-		}
-	}
-
-	cbo->dev = kdev;
-	*cma_bo = cbo;
-
-	return ret;
-
-copy_fail:
-	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->adev, bo->mem, pdd->drm_priv, NULL);
-pdd_fail:
-	if (cbo->sg) {
-		kfd_put_sg_table(cbo->sg);
-		sg_free_table(cbo->sg);
-		kfree(cbo->sg);
-	}
-sg_fail:
-	kfree(cbo);
-	return ret;
-}
-
-/* Update cma_iter.cur_bo with KFD BO that is assocaited with
- * cma_iter.array.va_addr
- */
-static int kfd_cma_iter_update_bo(struct cma_iter *ci)
-{
-	struct kfd_memory_range *arr = ci->array;
-	uint64_t va_end = arr->va_addr + arr->size - 1;
-
-	mutex_lock(&ci->p->mutex);
-	ci->cur_bo = kfd_process_find_bo_from_interval(ci->p, arr->va_addr,
-						       va_end);
-	mutex_unlock(&ci->p->mutex);
-
-	if (!ci->cur_bo || va_end > ci->cur_bo->it.last) {
-		pr_err("CMA failed. Range out of bounds\n");
-		return -EFAULT;
-	}
-	return 0;
-}
-
-/* Advance iter by @size bytes. */
-static int kfd_cma_iter_advance(struct cma_iter *ci, unsigned long size)
-{
-	int ret = 0;
-
-	ci->offset += size;
-	if (WARN_ON(size > ci->total || ci->offset > ci->array->size))
-		return -EFAULT;
-	ci->total -= size;
-	/* If current range is copied, move to next range if available. */
-	if (ci->offset == ci->array->size) {
-
-		/* End of all ranges */
-		if (!(--ci->nr_segs))
-			return 0;
-
-		ci->array++;
-		ci->offset = 0;
-		ret = kfd_cma_iter_update_bo(ci);
-		if (ret)
-			return ret;
-	}
-	ci->bo_offset = (ci->array->va_addr + ci->offset) -
-			   ci->cur_bo->it.start;
-	return ret;
-}
-
-static int kfd_cma_iter_init(struct kfd_memory_range *arr, unsigned long segs,
-			     struct kfd_process *p, struct mm_struct *mm,
-			     struct task_struct *task, struct cma_iter *ci)
-{
-	int ret;
-	int nr;
-
-	if (!arr || !segs)
-		return -EINVAL;
-
-	memset(ci, 0, sizeof(*ci));
-	INIT_LIST_HEAD(&ci->cma_list);
-	ci->array = arr;
-	ci->nr_segs = segs;
-	ci->p = p;
-	ci->offset = 0;
-	ci->mm = mm;
-	ci->task = task;
-	for (nr = 0; nr < segs; nr++)
-		ci->total += arr[nr].size;
-
-	/* Valid but size is 0. So copied will also be 0 */
-	if (!ci->total)
-		return 0;
-
-	ret = kfd_cma_iter_update_bo(ci);
-	if (!ret)
-		ci->bo_offset = arr->va_addr - ci->cur_bo->it.start;
-	return ret;
-}
-
-static bool kfd_cma_iter_end(struct cma_iter *ci)
-{
-	if (!(ci->nr_segs) || !(ci->total))
-		return true;
-	return false;
-}
-
-/* Copies @size bytes from si->cur_bo to di->cur_bo BO. The function assumes
- * both source and dest. BOs are userptr BOs. Both BOs can either belong to
- * current process or one of the BOs can belong to a differnt
- * process. @Returns 0 on success, -ve on failure
- *
- * @si: Source iter
- * @di: Dest. iter
- * @cma_write: Indicates if it is write to remote or read from remote
- * @size: amount of bytes to be copied
- * @copied: Return number of bytes actually copied.
- */
-static int kfd_copy_userptr_bos(struct cma_iter *si, struct cma_iter *di,
-				bool cma_write, uint64_t size,
-				uint64_t *copied)
-{
-	int i, ret = 0, locked;
-	unsigned int nents, nl;
-	unsigned int offset_in_page;
-	struct page *pp_stack[MAX_PP_STACK_COUNT];
-	struct page **process_pages = pp_stack;
-	unsigned long rva, lva = 0, flags = 0;
-	uint64_t copy_size, to_copy = size;
-	struct cma_iter *li, *ri;
-
-	if (cma_write) {
-		ri = di;
-		li = si;
-		flags |= FOLL_WRITE;
-	} else {
-		li = di;
-		ri = si;
-	}
-	/* rva: remote virtual address. Page aligned to start page.
-	 * rva + offset_in_page: Points to remote start address
-	 * lva: local virtual address. Points to the start address.
-	 * nents: computes number of remote pages to request
-	 */
-	offset_in_page = ri->bo_offset & (PAGE_SIZE - 1);
-	rva = (ri->cur_bo->cpuva + ri->bo_offset) & PAGE_MASK;
-	lva = li->cur_bo->cpuva + li->bo_offset;
-
-	nents = (size + offset_in_page + PAGE_SIZE - 1) / PAGE_SIZE;
-
-	copy_size = min_t(uint64_t, size, PAGE_SIZE - offset_in_page);
-	*copied = 0;
-
-	if (nents > MAX_PP_STACK_COUNT) {
-		/* For reliability kmalloc only 2 pages worth */
-		process_pages = kmalloc(min_t(size_t, MAX_KMALLOC_PAGES,
-					      sizeof(struct pages *)*nents),
-					GFP_KERNEL);
-
-		if (!process_pages)
-			return -ENOMEM;
-	}
-
-	while (nents && to_copy) {
-		nl = min_t(unsigned int, MAX_PP_KMALLOC_COUNT, nents);
-		locked = 1;
-		mmap_read_lock(ri->mm);
-		nl = kcl_get_user_pages_remote(ri->task, ri->mm, rva, nl,
-					   flags, process_pages, NULL,
-					   &locked);
-		if (locked)
-			mmap_read_unlock(ri->mm);
-		if (nl <= 0) {
-			pr_err("CMA: Invalid virtual address 0x%lx\n", rva);
-			ret = -EFAULT;
-			break;
-		}
-
-		for (i = 0; i < nl; i++) {
-			unsigned int n;
-			void *kaddr = kmap(process_pages[i]);
-
-			if (cma_write) {
-				n = copy_from_user(kaddr+offset_in_page,
-						   (void *)lva, copy_size);
-				set_page_dirty(process_pages[i]);
-			} else {
-				n = copy_to_user((void *)lva,
-						 kaddr+offset_in_page,
-						 copy_size);
-			}
-			kunmap(kaddr);
-			if (n) {
-				ret = -EFAULT;
-				break;
-			}
-			to_copy -= copy_size;
-			if (!to_copy)
-				break;
-			lva += copy_size;
-			rva += (copy_size + offset_in_page);
-			WARN_ONCE(rva & (PAGE_SIZE - 1),
-				  "CMA: Error in remote VA computation");
-			offset_in_page = 0;
-			copy_size = min_t(uint64_t, to_copy, PAGE_SIZE);
-		}
-
-		for (i = 0; i < nl; i++)
-			put_page(process_pages[i]);
-
-		if (ret)
-			break;
-		nents -= nl;
-	}
-
-	if (process_pages != pp_stack)
-		kfree(process_pages);
-
-	*copied = (size - to_copy);
-	return ret;
-
-}
-
-static int kfd_create_kgd_mem(struct kfd_dev *kdev, uint64_t size,
-			      struct kfd_process *p, struct kgd_mem **mem)
-{
-	int ret;
-	struct kfd_process_device *pdd = NULL;
-	uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
-			 KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE;
-
-	if (!mem || !size || !p || !kdev)
-		return -EINVAL;
-
-	*mem = NULL;
-
-	mutex_lock(&p->mutex);
-	pdd = kfd_get_process_device_data(kdev, p);
-	if (!pdd) {
-		mutex_unlock(&p->mutex);
-		pr_err("Process device data doesn't exist\n");
-		return -EINVAL;
-	}
-
-	ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, 0ULL, size,
-						      pdd->drm_priv, NULL,
-						      mem, NULL, flags, false);
-	mutex_unlock(&p->mutex);
-	if (ret) {
-		pr_err("Failed to create shadow system BO %d\n", ret);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int kfd_destroy_kgd_mem(struct kgd_mem *mem)
-{
-        struct amdgpu_device *adev;
-        struct task_struct *task;
-        struct kfd_process *p;
-        struct kfd_process_device *pdd;
-        uint32_t gpu_id, gpu_idx;
-        int r;
-
-	if (!mem)
-		return -EINVAL;
-
-        adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
-        task = get_pid_task(mem->process_info->pid, PIDTYPE_PID);
-        p = kfd_get_process(task);
-        r = kfd_process_gpuid_from_adev(p, adev, &gpu_id, &gpu_idx);
-        if (r < 0) {
-                pr_warn("no gpu id found, mem maybe leaking\n");
-                return -EINVAL;
-        }
-        pdd = kfd_process_device_from_gpuidx(p, gpu_idx);
-
-	/* param adev is not used*/
-	return amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, mem, pdd->drm_priv, NULL);
-}
-
-/* Copies @size bytes from si->cur_bo to di->cur_bo starting at their
- * respective offset.
- * @si: Source iter
- * @di: Dest. iter
- * @cma_write: Indicates if it is write to remote or read from remote
- * @size: amount of bytes to be copied
- * @f: Return the last fence if any
- * @copied: Return number of bytes actually copied.
- */
-static int kfd_copy_bos(struct cma_iter *si, struct cma_iter *di,
-			int cma_write, uint64_t size,
-			struct dma_fence **f, uint64_t *copied,
-			struct kgd_mem **tmp_mem)
-{
-	int err = 0;
-	struct kfd_bo *dst_bo = di->cur_bo, *src_bo = si->cur_bo;
-	uint64_t src_offset = si->bo_offset, dst_offset = di->bo_offset;
-	struct kgd_mem *src_mem = src_bo->mem, *dst_mem = dst_bo->mem;
-	struct kfd_dev *dev = dst_bo->dev;
-	int d2d = 0;
-
-	*copied = 0;
-	if (f)
-		*f = NULL;
-	if (src_bo->cpuva && dst_bo->cpuva)
-		return kfd_copy_userptr_bos(si, di, cma_write, size, copied);
-
-	/* If either source or dest. is userptr, create a shadow system BO
-	 * by using the underlying userptr BO pages. Then use this shadow
-	 * BO for copy. src_offset & dst_offset are adjusted because the new BO
-	 * is only created for the window (offset, size) requested.
-	 * The shadow BO is created on the other device. This means if the
-	 * other BO is a device memory, the copy will be using that device.
-	 * The BOs are stored in cma_list for deferred cleanup. This minimizes
-	 * fence waiting just to the last fence.
-	 */
-	if (src_bo->cpuva) {
-		dev = dst_bo->dev;
-		err = kfd_create_cma_system_bo(dev, src_bo, &size,
-					       si->bo_offset, cma_write,
-					       si->p, si->mm, si->task,
-					       &si->cma_bo);
-		src_mem = si->cma_bo->mem;
-		src_offset = si->bo_offset & (PAGE_SIZE - 1);
-		list_add_tail(&si->cma_bo->list, &si->cma_list);
-	} else if (dst_bo->cpuva) {
-		dev = src_bo->dev;
-		err = kfd_create_cma_system_bo(dev, dst_bo, &size,
-					       di->bo_offset, cma_write,
-					       di->p, di->mm, di->task,
-					       &di->cma_bo);
-		dst_mem = di->cma_bo->mem;
-		dst_offset = di->bo_offset & (PAGE_SIZE - 1);
-		list_add_tail(&di->cma_bo->list, &di->cma_list);
-	} else if (src_bo->dev->adev != dst_bo->dev->adev) {
-		/* This indicates that atleast on of the BO is in local mem.
-		 * If both are in local mem of different devices then create an
-		 * intermediate System BO and do a double copy
-		 * [VRAM]--gpu1-->[System BO]--gpu2-->[VRAM].
-		 * If only one BO is in VRAM then use that GPU to do the copy
-		 */
-		if (src_bo->mem_type == KFD_IOC_ALLOC_MEM_FLAGS_VRAM &&
-		    dst_bo->mem_type == KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
-			dev = dst_bo->dev;
-			size = min_t(uint64_t, size, MAX_SYSTEM_BO_SIZE);
-			d2d = 1;
-
-			if (*tmp_mem == NULL) {
-				if (kfd_create_kgd_mem(src_bo->dev,
-							MAX_SYSTEM_BO_SIZE,
-							si->p,
-							tmp_mem))
-					return -EINVAL;
-			}
-
-			if (amdgpu_amdkfd_copy_mem_to_mem(src_bo->dev->adev,
-						src_bo->mem, si->bo_offset,
-						*tmp_mem, 0,
-						size, f, &size))
-				/* tmp_mem will be freed in caller.*/
-				return -EINVAL;
-
-			kfd_cma_fence_wait(*f);
-			dma_fence_put(*f);
-
-			src_mem = *tmp_mem;
-			src_offset = 0;
-		} else if (src_bo->mem_type == KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
-			dev = src_bo->dev;
-		/* else already set to dst_bo->dev */
-	}
-
-	if (err) {
-		pr_err("Failed to create system BO %d", err);
-		return -EINVAL;
-	}
-
-	err = amdgpu_amdkfd_copy_mem_to_mem(dev->adev, src_mem, src_offset,
-					    dst_mem, dst_offset, size, f,
-					    copied);
-	/* The tmp_bo allocates additional memory. So it is better to wait and
-	 * delete. Also since multiple GPUs are involved the copies are
-	 * currently not pipelined.
-	 */
-	if (*tmp_mem && d2d) {
-		if (!err) {
-			kfd_cma_fence_wait(*f);
-			dma_fence_put(*f);
-			*f = NULL;
-		}
-	}
-	return err;
-}
-
-/* Copy single range from source iterator @si to destination iterator @di.
- * @si will move to next range and @di will move by bytes copied.
- * @return : 0 for success or -ve for failure
- * @f: The last fence if any
- * @copied: out: number of bytes copied
- */
-static int kfd_copy_single_range(struct cma_iter *si, struct cma_iter *di,
-				 bool cma_write, struct dma_fence **f,
-				 uint64_t *copied, struct kgd_mem **tmp_mem)
-{
-	int err = 0;
-	uint64_t copy_size, n;
-	uint64_t size = si->array->size;
-	struct kfd_bo *src_bo = si->cur_bo;
-	struct dma_fence *lfence = NULL;
-
-	if (!src_bo || !di || !copied)
-		return -EINVAL;
-	*copied = 0;
-	if (f)
-		*f = NULL;
-
-	while (size && !kfd_cma_iter_end(di)) {
-		struct dma_fence *fence = NULL;
-
-		copy_size = min(size, (di->array->size - di->offset));
-
-		err = kfd_copy_bos(si, di, cma_write, copy_size,
-				&fence, &n, tmp_mem);
-		if (err) {
-			pr_err("CMA %d failed\n", err);
-			break;
-		}
-
-		if (fence) {
-			err = kfd_fence_put_wait_if_diff_context(fence,
-								 lfence);
-			lfence = fence;
-			if (err)
-				break;
-		}
-
-		size -= n;
-		*copied += n;
-		err = kfd_cma_iter_advance(si, n);
-		if (err)
-			break;
-		err = kfd_cma_iter_advance(di, n);
-		if (err)
-			break;
-	}
-
-	if (f)
-		*f = dma_fence_get(lfence);
-	dma_fence_put(lfence);
-
-	return err;
-}
-
-static int kfd_ioctl_cross_memory_copy(struct file *filep,
-				       struct kfd_process *local_p, void *data)
-{
-	struct kfd_ioctl_cross_memory_copy_args *args = data;
-	struct kfd_memory_range *src_array, *dst_array;
-	struct kfd_process *remote_p;
-	struct task_struct *remote_task;
-	struct mm_struct *remote_mm;
-	struct pid *remote_pid;
-	struct dma_fence *lfence = NULL;
-	uint64_t copied = 0, total_copied = 0;
-	struct cma_iter di, si;
-	const char *cma_op;
-	int err = 0;
-	struct kgd_mem *tmp_mem = NULL;
-
-	/* Check parameters */
-	if (args->src_mem_range_array == 0 || args->dst_mem_range_array == 0 ||
-		args->src_mem_array_size == 0 || args->dst_mem_array_size == 0)
-		return -EINVAL;
-	args->bytes_copied = 0;
-
-	/* Allocate space for source and destination arrays */
-	src_array = kmalloc_array((args->src_mem_array_size +
-				  args->dst_mem_array_size),
-				  sizeof(struct kfd_memory_range),
-				  GFP_KERNEL);
-	if (!src_array)
-		return -ENOMEM;
-	dst_array = &src_array[args->src_mem_array_size];
-
-	if (copy_from_user(src_array, (void __user *)args->src_mem_range_array,
-			   args->src_mem_array_size *
-			   sizeof(struct kfd_memory_range))) {
-		err = -EFAULT;
-		goto copy_from_user_fail;
-	}
-	if (copy_from_user(dst_array, (void __user *)args->dst_mem_range_array,
-			   args->dst_mem_array_size *
-			   sizeof(struct kfd_memory_range))) {
-		err = -EFAULT;
-		goto copy_from_user_fail;
-	}
-
-	/* Get remote process */
-	remote_pid = find_get_pid(args->pid);
-	if (!remote_pid) {
-		pr_err("Cross mem copy failed. Invalid PID %d\n", args->pid);
-		err = -ESRCH;
-		goto copy_from_user_fail;
-	}
-
-	remote_task = get_pid_task(remote_pid, PIDTYPE_PID);
-	if (!remote_pid) {
-		pr_err("Cross mem copy failed. Invalid PID or task died %d\n",
-			args->pid);
-		err = -ESRCH;
-		goto get_pid_task_fail;
-	}
-
-	/* Check access permission */
-	remote_mm = mm_access(remote_task, PTRACE_MODE_ATTACH_REALCREDS);
-	if (!remote_mm || IS_ERR(remote_mm)) {
-		err = IS_ERR(remote_mm) ? PTR_ERR(remote_mm) : -ESRCH;
-		if (err == -EACCES) {
-			pr_err("Cross mem copy failed. Permission error\n");
-			err = -EPERM;
-		} else
-			pr_err("Cross mem copy failed. Invalid task %d\n",
-			       err);
-		goto mm_access_fail;
-	}
-
-	remote_p = kfd_get_process(remote_task);
-	if (IS_ERR(remote_p)) {
-		pr_err("Cross mem copy failed. Invalid kfd process %d\n",
-		       args->pid);
-		err = -EINVAL;
-		goto kfd_process_fail;
-	}
-	/* Initialise cma_iter si & @di with source & destination range. */
-	if (KFD_IS_CROSS_MEMORY_WRITE(args->flags)) {
-		cma_op = "WRITE";
-		pr_debug("CMA WRITE: local -> remote\n");
-		err = kfd_cma_iter_init(dst_array, args->dst_mem_array_size,
-					remote_p, remote_mm, remote_task, &di);
-		if (err)
-			goto kfd_process_fail;
-		err = kfd_cma_iter_init(src_array, args->src_mem_array_size,
-					local_p, current->mm, current, &si);
-		if (err)
-			goto kfd_process_fail;
-	} else {
-		cma_op = "READ";
-		pr_debug("CMA READ: remote -> local\n");
-
-		err = kfd_cma_iter_init(dst_array, args->dst_mem_array_size,
-					local_p, current->mm, current, &di);
-		if (err)
-			goto kfd_process_fail;
-		err = kfd_cma_iter_init(src_array, args->src_mem_array_size,
-					remote_p, remote_mm, remote_task, &si);
-		if (err)
-			goto kfd_process_fail;
-	}
-
-	/* Copy one si range at a time into di. After each call to
-	 * kfd_copy_single_range() si will move to next range. di will be
-	 * incremented by bytes copied
-	 */
-	while (!kfd_cma_iter_end(&si) && !kfd_cma_iter_end(&di)) {
-		struct dma_fence *fence = NULL;
-
-		err = kfd_copy_single_range(&si, &di,
-					KFD_IS_CROSS_MEMORY_WRITE(args->flags),
-					&fence, &copied, &tmp_mem);
-		total_copied += copied;
-
-		if (err)
-			break;
-
-		/* Release old fence if a later fence is created. If no
-		 * new fence is created, then keep the preivous fence
-		 */
-		if (fence) {
-			err = kfd_fence_put_wait_if_diff_context(fence,
-								 lfence);
-			lfence = fence;
-			if (err)
-				break;
-		}
-	}
-
-	/* Wait for the last fence irrespective of error condition */
-	if (lfence) {
-		err = kfd_cma_fence_wait(lfence);
-		dma_fence_put(lfence);
-		if (err)
-			pr_err("CMA %s failed. BO timed out\n", cma_op);
-	}
-
-	if (tmp_mem)
-		kfd_destroy_kgd_mem(tmp_mem);
-
-	kfd_free_cma_bos(&si);
-	kfd_free_cma_bos(&di);
-
-kfd_process_fail:
-	mmput(remote_mm);
-mm_access_fail:
-	put_task_struct(remote_task);
-get_pid_task_fail:
-	put_pid(remote_pid);
-copy_from_user_fail:
-	kfree(src_array);
-
-	/* An error could happen after partial copy. In that case this will
-	 * reflect partial amount of bytes copied
-	 */
-	args->bytes_copied = total_copied;
-	return err;
-}
-
 static int kfd_ioctl_dbg_set_debug_trap(struct file *filep,
 				struct kfd_process *p, void *data)
 {
@@ -3447,7 +2586,7 @@ static int criu_restore_memory_of_gpu_ipc(struct kfd_process_device *pdd,
 		 */
 		ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(dev->adev, bo_bucket->addr,
 							      bo_bucket->size, pdd->drm_priv,
-							      NULL, kgd_mem, &offset,
+							      kgd_mem, &offset,
 							      bo_bucket->alloc_flags, true);
 		if (ret) {
 			pr_err("Could not create the BO\n");
@@ -3538,7 +2677,7 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
 	}
 	/* Create the BO */
 	ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(pdd->dev->adev, bo_bucket->addr,
-						      bo_bucket->size, pdd->drm_priv, NULL, kgd_mem,
+						      bo_bucket->size, pdd->drm_priv, kgd_mem,
 						      &offset, bo_bucket->alloc_flags, criu_resume);
 	if (ret) {
 		pr_err("Could not create the BO\n");
@@ -4086,9 +3225,6 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_IPC_EXPORT_HANDLE,
 				kfd_ioctl_ipc_export_handle, 0),
 
-	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CROSS_MEMORY_COPY,
-				kfd_ioctl_cross_memory_copy, 0),
-
 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_TRAP,
 			kfd_ioctl_dbg_set_debug_trap, 0),
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 07b9fe4016a60..bc99d2d490abf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -24,6 +24,7 @@
 #include <linux/bsearch.h>
 #include <linux/pci.h>
 #include <linux/slab.h>
+#include <linux/topology.h>
 #include "kfd_priv.h"
 #include "kfd_device_queue_manager.h"
 #include "kfd_pm4_headers_vi.h"
@@ -102,13 +103,18 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
 
 	switch (sdma_version) {
 	case IP_VERSION(6, 0, 0):
-	case IP_VERSION(6, 0, 1):
 	case IP_VERSION(6, 0, 2):
 		/* Reserve 1 for paging and 1 for gfx */
 		kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
 		/* BIT(0)=engine-0 queue-0; BIT(1)=engine-1 queue-0; BIT(2)=engine-0 queue-1; ... */
 		kfd->device_info.reserved_sdma_queues_bitmap = 0xFULL;
 		break;
+	case IP_VERSION(6, 0, 1):
+		/* Reserve 1 for paging and 1 for gfx */
+		kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
+		/* BIT(0)=engine-0 queue-0; BIT(1)=engine-0 queue-1; ... */
+		kfd->device_info.reserved_sdma_queues_bitmap = 0x3ULL;
+		break;
 	default:
 		break;
 	}
@@ -814,13 +820,24 @@ static inline void kfd_queue_work(struct workqueue_struct *wq,
 				  struct work_struct *work)
 {
 	int cpu, new_cpu;
+	const struct cpumask *mask = NULL;
 
 	cpu = new_cpu = smp_processor_id();
-	do {
-		new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids;
-		if (cpu_to_node(new_cpu) == numa_node_id())
+
+#if defined(CONFIG_SCHED_SMT)
+	/* CPU threads in the same core */
+	mask = cpu_smt_mask(cpu);
+#endif
+	if (!mask || cpumask_weight(mask) <= 1)
+		/* CPU threads in the same NUMA node */
+		mask = cpu_cpu_mask(cpu);
+	/* Pick the next online CPU thread in the same core or NUMA node */
+	for_each_cpu_wrap(cpu, mask, cpu+1) {
+		if (cpu != new_cpu && cpu_online(cpu)) {
+			new_cpu = cpu;
 			break;
-	} while (cpu != new_cpu);
+		}
+	}
 
 	queue_work_on(new_cpu, wq, work);
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index db2a8a070b695..182eb67edbc52 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -383,38 +383,6 @@ struct kfd_bo {
 	unsigned int mem_type;
 };
 
-struct cma_system_bo {
-	struct kgd_mem *mem;
-	struct sg_table *sg;
-	struct kfd_dev *dev;
-	struct list_head list;
-};
-
-/* Similar to iov_iter */
-struct cma_iter {
-	/* points to current entry of range array */
-	struct kfd_memory_range *array;
-	/* total number of entries in the initial array */
-	unsigned long nr_segs;
-	/* total amount of data pointed by kfd array*/
-	unsigned long total;
-	/* offset into the entry pointed by cma_iter.array */
-	unsigned long offset;
-	struct kfd_process *p;
-	struct mm_struct *mm;
-	struct task_struct *task;
-	/* current kfd_bo associated with cma_iter.array.va_addr */
-	struct kfd_bo *cur_bo;
-	/* offset w.r.t cur_bo */
-	unsigned long bo_offset;
-	/* If cur_bo is a userptr BO, then a shadow system BO is created
-	 * using its underlying pages. cma_bo holds this BO. cma_list is a
-	 * list cma_bos created in one session
-	 */
-	struct cma_system_bo *cma_bo;
-	struct list_head cma_list;
-};
-
 enum kfd_mempool {
 	KFD_MEMPOOL_SYSTEM_CACHEABLE = 1,
 	KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 43697b3e4c9c2..5198dd636765a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -717,7 +717,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
 	int err;
 
 	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, gpu_va, size,
-						 pdd->drm_priv, NULL, mem, NULL,
+						 pdd->drm_priv, mem, NULL,
 						 flags, false);
 	if (err)
 		goto err_alloc_mem;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 4b9d2a15fb085..e5708de28ff3b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -545,7 +545,6 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
 		kfree(svm_bo);
 		return -ESRCH;
 	}
-	svm_bo->svms = prange->svms;
 	svm_bo->eviction_fence =
 		amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
 					   mm,
@@ -3277,7 +3276,6 @@ int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence)
 static void svm_range_evict_svm_bo_worker(struct work_struct *work)
 {
 	struct svm_range_bo *svm_bo;
-	struct kfd_process *p;
 	struct mm_struct *mm;
 	int r = 0;
 
@@ -3285,13 +3283,12 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
 	if (!svm_bo_ref_unless_zero(svm_bo))
 		return; /* svm_bo was freed while eviction was pending */
 
-	/* svm_range_bo_release destroys this worker thread. So during
-	 * the lifetime of this thread, kfd_process and mm will be valid.
-	 */
-	p = container_of(svm_bo->svms, struct kfd_process, svms);
-	mm = p->mm;
-	if (!mm)
+	if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
+		mm = svm_bo->eviction_fence->mm;
+	} else {
+		svm_range_bo_unref(svm_bo);
 		return;
+	}
 
 	mmap_read_lock(mm);
 	spin_lock(&svm_bo->list_lock);
@@ -3309,8 +3306,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
 
 		mutex_lock(&prange->migrate_mutex);
 		do {
-			r = svm_migrate_vram_to_ram(prange,
-						svm_bo->eviction_fence->mm,
+			r = svm_migrate_vram_to_ram(prange, mm,
 						KFD_MIGRATE_TRIGGER_TTM_EVICTION);
 		} while (!r && prange->actual_loc && --retries);
 
@@ -3328,6 +3324,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
 	}
 	spin_unlock(&svm_bo->list_lock);
 	mmap_read_unlock(mm);
+	mmput(mm);
 
 	dma_fence_signal(&svm_bo->eviction_fence->base);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 9156b041ef175..cfac13ad06ef0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -46,7 +46,6 @@ struct svm_range_bo {
 	spinlock_t			list_lock;
 	struct amdgpu_amdkfd_fence	*eviction_fence;
 	struct work_struct		eviction_work;
-	struct svm_range_list		*svms;
 	uint32_t			evicting;
 	struct work_struct		release_work;
 };
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 779d3f2374664..f396b28327808 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1494,8 +1494,8 @@ static int kfd_build_p2p_node_entry(struct kfd_topology_device *dev,
 
 static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int gpu_node)
 {
+	struct kfd_iolink_properties *gpu_link, *tmp_link, *cpu_link;
 	struct kfd_iolink_properties *props = NULL, *props2 = NULL;
-	struct kfd_iolink_properties *gpu_link, *cpu_link;
 	struct kfd_topology_device *cpu_dev;
 	int ret = 0;
 	int i, num_cpu;
@@ -1518,16 +1518,19 @@ static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int g
 			continue;
 
 		/* find CPU <-->  CPU links */
+		cpu_link = NULL;
 		cpu_dev = kfd_topology_device_by_proximity_domain(i);
 		if (cpu_dev) {
-			list_for_each_entry(cpu_link,
+			list_for_each_entry(tmp_link,
 					&cpu_dev->io_link_props, list) {
-				if (cpu_link->node_to == gpu_link->node_to)
+				if (tmp_link->node_to == gpu_link->node_to) {
+					cpu_link = tmp_link;
 					break;
+				}
 			}
 		}
 
-		if (cpu_link->node_to != gpu_link->node_to)
+		if (!cpu_link)
 			return -ENOMEM;
 
 		/* CPU <--> CPU <--> GPU, GPU node*/
diff --git a/drivers/gpu/drm/amd/backport/backport.h b/drivers/gpu/drm/amd/backport/backport.h
index 1992b972a1bdd..b89f0fe0664ee 100644
--- a/drivers/gpu/drm/amd/backport/backport.h
+++ b/drivers/gpu/drm/amd/backport/backport.h
@@ -105,5 +105,6 @@
 #include <kcl/kcl_hypervisor.h>
 #include <kcl/backport/kcl_workqueue_backport.h>
 #include <kcl/kcl_string_helpers.h>
+#include <kcl/kcl_cpumask.h>
 
 #endif /* AMDGPU_BACKPORT_H */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index 24bd6b469a00e..191b1f0fd3452 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -818,14 +818,18 @@ bool dm_helpers_dp_write_dsc_enable(
 		const struct dc_stream_state *stream,
 		bool enable)
 {
+#if defined(HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX)
 	static const uint8_t DSC_DISABLE;
 	static const uint8_t DSC_DECODING = 0x01;
 	static const uint8_t DSC_PASSTHROUGH = 0x02;
 
-	struct amdgpu_dm_connector *aconnector;
 	struct drm_dp_mst_port *port;
 	uint8_t enable_dsc = enable ? DSC_DECODING : DSC_DISABLE;
 	uint8_t enable_passthrough = enable ? DSC_PASSTHROUGH : DSC_DISABLE;
+#else
+	uint8_t enable_dsc = enable ? 1 : 0;
+#endif
+	struct amdgpu_dm_connector *aconnector;
 	uint8_t ret = 0;
 
 	if (!stream)
@@ -844,7 +848,7 @@ bool dm_helpers_dp_write_dsc_enable(
 			return write_dsc_enable_synaptics_non_virtual_dpcd_mst(
 				aconnector->dsc_aux, stream, enable_dsc);
 #endif
-
+#if defined(HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX)
 		port = aconnector->port;
 
 		if (enable) {
@@ -878,6 +882,11 @@ bool dm_helpers_dp_write_dsc_enable(
 					  ret);
 			}
 		}
+#else
+		ret = drm_dp_dpcd_write(aconnector->dsc_aux, DP_DSC_ENABLE, &enable_dsc, 1);
+                DC_LOG_DC("Send DSC %s to MST RX\n", enable_dsc ? "enable" : "disable");
+#endif
+
 	}
 
 	if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT || stream->signal == SIGNAL_TYPE_EDP) {
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 1b9ccce0a78e4..c706b2d08b956 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -1568,8 +1568,6 @@ bool pre_validate_dsc(struct drm_atomic_state *state,
 	return (ret == 0);
 }
 
-#endif
-
 static unsigned int kbps_from_pbn(unsigned int pbn)
 {
 	unsigned int kbps = pbn;
@@ -1597,17 +1595,20 @@ static bool is_dsc_common_config_possible(struct dc_stream_state *stream,
 
 	return bw_range->max_target_bpp_x16 && bw_range->min_target_bpp_x16;
 }
+#endif /* CONFIG_DRM_AMD_DC_DCN */
 
 enum dc_status dm_dp_mst_is_port_support_mode(
 	struct amdgpu_dm_connector *aconnector,
 	struct dc_stream_state *stream)
 {
+	int bpp, pbn, branch_max_throughput_mps = 0;
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+#ifdef HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX
 	struct dc_link_settings cur_link_settings;
 	unsigned int end_to_end_bw_in_kbps = 0;
 	unsigned int upper_link_bw_in_kbps = 0, down_link_bw_in_kbps = 0;
 	unsigned int max_compressed_bw_in_kbps = 0;
 	struct dc_dsc_bw_range bw_range = {0};
-	int bpp, pbn, branch_max_throughput_mps = 0;
 
 	/*
 	 * check if the mode could be supported if DSC pass-through is supported
@@ -1642,13 +1643,19 @@ enum dc_status dm_dp_mst_is_port_support_mode(
 			return DC_FAIL_BANDWIDTH_VALIDATE;
 		}
 	} else {
+#endif
+#endif
 		/* check if mode could be supported within full_pbn */
 		bpp = convert_dc_color_depth_into_bpc(stream->timing.display_color_depth) * 3;
 		pbn = drm_dp_calc_pbn_mode(stream->timing.pix_clk_100hz / 10, bpp, false);
 
 		if (pbn > aconnector->port->full_pbn)
 			return DC_FAIL_BANDWIDTH_VALIDATE;
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+#ifdef HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX
 	}
+#endif
+#endif
 
 	/* check is mst dsc output bandwidth branch_overall_throughput_0_mps */
 	switch (stream->timing.pixel_encoding) {
diff --git a/drivers/gpu/drm/amd/display/dc/basics/conversion.c b/drivers/gpu/drm/amd/display/dc/basics/conversion.c
index 6767fab55c260..352e9afb85c6d 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/conversion.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/conversion.c
@@ -100,3 +100,24 @@ void convert_float_matrix(
 		matrix[i] = (uint16_t)reg_value;
 	}
 }
+
+static uint32_t find_gcd(uint32_t a, uint32_t b)
+{
+	uint32_t remainder = 0;
+	while (b != 0) {
+		remainder = a % b;
+		a = b;
+		b = remainder;
+	}
+	return a;
+}
+
+void reduce_fraction(uint32_t num, uint32_t den,
+		uint32_t *out_num, uint32_t *out_den)
+{
+	uint32_t gcd = 0;
+
+	gcd = find_gcd(num, den);
+	*out_num = num / gcd;
+	*out_den = den / gcd;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/basics/conversion.h b/drivers/gpu/drm/amd/display/dc/basics/conversion.h
index ade785c4fdc7d..81da4e6f7a1ac 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/conversion.h
+++ b/drivers/gpu/drm/amd/display/dc/basics/conversion.h
@@ -38,6 +38,9 @@ void convert_float_matrix(
 	struct fixed31_32 *flt,
 	uint32_t buffer_size);
 
+void reduce_fraction(uint32_t num, uint32_t den,
+		uint32_t *out_num, uint32_t *out_den);
+
 static inline unsigned int log_2(unsigned int num)
 {
 	return ilog2(num);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
index 7af19823a29db..beb025cd3dc29 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
@@ -415,7 +415,7 @@ static struct wm_table lpddr5_wm_table = {
 	}
 };
 
-static DpmClocks_t dummy_clocks;
+static DpmClocks314_t dummy_clocks;
 
 static struct dcn314_watermarks dummy_wms = { 0 };
 
@@ -500,7 +500,7 @@ static void dcn314_notify_wm_ranges(struct clk_mgr *clk_mgr_base)
 static void dcn314_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr,
 		struct dcn314_smu_dpm_clks *smu_dpm_clks)
 {
-	DpmClocks_t *table = smu_dpm_clks->dpm_clks;
+	DpmClocks314_t *table = smu_dpm_clks->dpm_clks;
 
 	if (!clk_mgr->smu_ver)
 		return;
@@ -517,6 +517,26 @@ static void dcn314_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr,
 	dcn314_smu_transfer_dpm_table_smu_2_dram(clk_mgr);
 }
 
+static inline bool is_valid_clock_value(uint32_t clock_value)
+{
+	return clock_value > 1 && clock_value < 100000;
+}
+
+static unsigned int convert_wck_ratio(uint8_t wck_ratio)
+{
+	switch (wck_ratio) {
+	case WCK_RATIO_1_2:
+		return 2;
+
+	case WCK_RATIO_1_4:
+		return 4;
+
+	default:
+		break;
+	}
+	return 1;
+}
+
 static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks)
 {
 	uint32_t max = 0;
@@ -530,89 +550,127 @@ static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks)
 	return max;
 }
 
-static unsigned int find_clk_for_voltage(
-		const DpmClocks_t *clock_table,
-		const uint32_t clocks[],
-		unsigned int voltage)
-{
-	int i;
-	int max_voltage = 0;
-	int clock = 0;
-
-	for (i = 0; i < NUM_SOC_VOLTAGE_LEVELS; i++) {
-		if (clock_table->SocVoltage[i] == voltage) {
-			return clocks[i];
-		} else if (clock_table->SocVoltage[i] >= max_voltage &&
-				clock_table->SocVoltage[i] < voltage) {
-			max_voltage = clock_table->SocVoltage[i];
-			clock = clocks[i];
-		}
-	}
-
-	ASSERT(clock);
-	return clock;
-}
-
 static void dcn314_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk_mgr,
 						    struct integrated_info *bios_info,
-						    const DpmClocks_t *clock_table)
+						    const DpmClocks314_t *clock_table)
 {
-	int i, j;
 	struct clk_bw_params *bw_params = clk_mgr->base.bw_params;
-	uint32_t max_dispclk = 0, max_dppclk = 0;
-
-	j = -1;
-
-	ASSERT(NUM_DF_PSTATE_LEVELS <= MAX_NUM_DPM_LVL);
-
-	/* Find lowest DPM, FCLK is filled in reverse order*/
+	struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1];
+	uint32_t max_pstate = 0,  max_fclk = 0,  min_pstate = 0, max_dispclk = 0, max_dppclk = 0;
+	int i;
 
-	for (i = NUM_DF_PSTATE_LEVELS - 1; i >= 0; i--) {
-		if (clock_table->DfPstateTable[i].FClk != 0) {
-			j = i;
-			break;
+	/* Find highest valid fclk pstate */
+	for (i = 0; i < clock_table->NumDfPstatesEnabled; i++) {
+		if (is_valid_clock_value(clock_table->DfPstateTable[i].FClk) &&
+		    clock_table->DfPstateTable[i].FClk > max_fclk) {
+			max_fclk = clock_table->DfPstateTable[i].FClk;
+			max_pstate = i;
 		}
 	}
 
-	if (j == -1) {
-		/* clock table is all 0s, just use our own hardcode */
-		ASSERT(0);
-		return;
-	}
-
-	bw_params->clk_table.num_entries = j + 1;
+	/* We expect the table to contain at least one valid fclk entry. */
+	ASSERT(is_valid_clock_value(max_fclk));
 
-	/* dispclk and dppclk can be max at any voltage, same number of levels for both */
+	/* Dispclk and dppclk can be max at any voltage, same number of levels for both */
 	if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS &&
 	    clock_table->NumDispClkLevelsEnabled <= NUM_DPPCLK_DPM_LEVELS) {
 		max_dispclk = find_max_clk_value(clock_table->DispClocks, clock_table->NumDispClkLevelsEnabled);
 		max_dppclk = find_max_clk_value(clock_table->DppClocks, clock_table->NumDispClkLevelsEnabled);
 	} else {
+		/* Invalid number of entries in the table from PMFW. */
 		ASSERT(0);
 	}
 
-	for (i = 0; i < bw_params->clk_table.num_entries; i++, j--) {
-		bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[j].FClk;
-		bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[j].MemClk;
-		bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[j].Voltage;
-		switch (clock_table->DfPstateTable[j].WckRatio) {
-		case WCK_RATIO_1_2:
-			bw_params->clk_table.entries[i].wck_ratio = 2;
-			break;
-		case WCK_RATIO_1_4:
-			bw_params->clk_table.entries[i].wck_ratio = 4;
-			break;
-		default:
-			bw_params->clk_table.entries[i].wck_ratio = 1;
+	/* Base the clock table on dcfclk, need at least one entry regardless of pmfw table */
+	for (i = 0; i < clock_table->NumDcfClkLevelsEnabled; i++) {
+		uint32_t min_fclk = clock_table->DfPstateTable[0].FClk;
+		int j;
+
+		for (j = 1; j < clock_table->NumDfPstatesEnabled; j++) {
+			if (is_valid_clock_value(clock_table->DfPstateTable[j].FClk) &&
+			    clock_table->DfPstateTable[j].FClk < min_fclk &&
+			    clock_table->DfPstateTable[j].Voltage <= clock_table->SocVoltage[i]) {
+				min_fclk = clock_table->DfPstateTable[j].FClk;
+				min_pstate = j;
+			}
 		}
-		bw_params->clk_table.entries[i].dcfclk_mhz = find_clk_for_voltage(clock_table, clock_table->DcfClocks, clock_table->DfPstateTable[j].Voltage);
-		bw_params->clk_table.entries[i].socclk_mhz = find_clk_for_voltage(clock_table, clock_table->SocClocks, clock_table->DfPstateTable[j].Voltage);
+
+		/* First search defaults for the clocks we don't read using closest lower or equal default dcfclk */
+		for (j = bw_params->clk_table.num_entries - 1; j > 0; j--)
+			if (bw_params->clk_table.entries[j].dcfclk_mhz <= clock_table->DcfClocks[i])
+				break;
+
+		bw_params->clk_table.entries[i].phyclk_mhz = bw_params->clk_table.entries[j].phyclk_mhz;
+		bw_params->clk_table.entries[i].phyclk_d18_mhz = bw_params->clk_table.entries[j].phyclk_d18_mhz;
+		bw_params->clk_table.entries[i].dtbclk_mhz = bw_params->clk_table.entries[j].dtbclk_mhz;
+
+		/* Now update clocks we do read */
+		bw_params->clk_table.entries[i].fclk_mhz = min_fclk;
+		bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[min_pstate].MemClk;
+		bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[min_pstate].Voltage;
+		bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[i];
+		bw_params->clk_table.entries[i].socclk_mhz = clock_table->SocClocks[i];
+		bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk;
+		bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk;
+		bw_params->clk_table.entries[i].wck_ratio = convert_wck_ratio(
+			clock_table->DfPstateTable[min_pstate].WckRatio);
+	};
+
+	/* Make sure to include at least one entry at highest pstate */
+	if (max_pstate != min_pstate || i == 0) {
+		if (i > MAX_NUM_DPM_LVL - 1)
+			i = MAX_NUM_DPM_LVL - 1;
+
+		bw_params->clk_table.entries[i].fclk_mhz = max_fclk;
+		bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[max_pstate].MemClk;
+		bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[max_pstate].Voltage;
+		bw_params->clk_table.entries[i].dcfclk_mhz = find_max_clk_value(clock_table->DcfClocks, NUM_DCFCLK_DPM_LEVELS);
+		bw_params->clk_table.entries[i].socclk_mhz = find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS);
 		bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk;
 		bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk;
+		bw_params->clk_table.entries[i].wck_ratio = convert_wck_ratio(
+			clock_table->DfPstateTable[max_pstate].WckRatio);
+		i++;
 	}
+	bw_params->clk_table.num_entries = i--;
+
+	/* Make sure all highest clocks are included*/
+	bw_params->clk_table.entries[i].socclk_mhz = find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS);
+	bw_params->clk_table.entries[i].dispclk_mhz = find_max_clk_value(clock_table->DispClocks, NUM_DISPCLK_DPM_LEVELS);
+	bw_params->clk_table.entries[i].dppclk_mhz = find_max_clk_value(clock_table->DppClocks, NUM_DPPCLK_DPM_LEVELS);
+	ASSERT(clock_table->DcfClocks[i] == find_max_clk_value(clock_table->DcfClocks, NUM_DCFCLK_DPM_LEVELS));
+	bw_params->clk_table.entries[i].phyclk_mhz = def_max.phyclk_mhz;
+	bw_params->clk_table.entries[i].phyclk_d18_mhz = def_max.phyclk_d18_mhz;
+	bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz;
 
+	/*
+	 * Set any 0 clocks to max default setting. Not an issue for
+	 * power since we aren't doing switching in such case anyway
+	 */
+	for (i = 0; i < bw_params->clk_table.num_entries; i++) {
+		if (!bw_params->clk_table.entries[i].fclk_mhz) {
+			bw_params->clk_table.entries[i].fclk_mhz = def_max.fclk_mhz;
+			bw_params->clk_table.entries[i].memclk_mhz = def_max.memclk_mhz;
+			bw_params->clk_table.entries[i].voltage = def_max.voltage;
+		}
+		if (!bw_params->clk_table.entries[i].dcfclk_mhz)
+			bw_params->clk_table.entries[i].dcfclk_mhz = def_max.dcfclk_mhz;
+		if (!bw_params->clk_table.entries[i].socclk_mhz)
+			bw_params->clk_table.entries[i].socclk_mhz = def_max.socclk_mhz;
+		if (!bw_params->clk_table.entries[i].dispclk_mhz)
+			bw_params->clk_table.entries[i].dispclk_mhz = def_max.dispclk_mhz;
+		if (!bw_params->clk_table.entries[i].dppclk_mhz)
+			bw_params->clk_table.entries[i].dppclk_mhz = def_max.dppclk_mhz;
+		if (!bw_params->clk_table.entries[i].phyclk_mhz)
+			bw_params->clk_table.entries[i].phyclk_mhz = def_max.phyclk_mhz;
+		if (!bw_params->clk_table.entries[i].phyclk_d18_mhz)
+			bw_params->clk_table.entries[i].phyclk_d18_mhz = def_max.phyclk_d18_mhz;
+		if (!bw_params->clk_table.entries[i].dtbclk_mhz)
+			bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz;
+	}
+	ASSERT(bw_params->clk_table.entries[i-1].dcfclk_mhz);
 	bw_params->vram_type = bios_info->memory_type;
-	bw_params->num_channels = bios_info->ma_channel_number;
+	bw_params->num_channels = bios_info->ma_channel_number ? bios_info->ma_channel_number : 4;
 
 	for (i = 0; i < WM_SET_COUNT; i++) {
 		bw_params->wm_table.entries[i].wm_inst = i;
@@ -671,10 +729,10 @@ void dcn314_clk_mgr_construct(
 	}
 	ASSERT(clk_mgr->smu_wm_set.wm_set);
 
-	smu_dpm_clks.dpm_clks = (DpmClocks_t *)dm_helpers_allocate_gpu_mem(
+	smu_dpm_clks.dpm_clks = (DpmClocks314_t *)dm_helpers_allocate_gpu_mem(
 				clk_mgr->base.base.ctx,
 				DC_MEM_ALLOC_TYPE_FRAME_BUFFER,
-				sizeof(DpmClocks_t),
+				sizeof(DpmClocks314_t),
 				&smu_dpm_clks.mc_address.quad_part);
 
 	if (smu_dpm_clks.dpm_clks == NULL) {
@@ -719,7 +777,7 @@ void dcn314_clk_mgr_construct(
 	if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) {
 		dcn314_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks);
 
-		if (ctx->dc_bios && ctx->dc_bios->integrated_info) {
+		if (ctx->dc_bios && ctx->dc_bios->integrated_info && ctx->dc->config.use_default_clock_table == false) {
 			dcn314_clk_mgr_helper_populate_bw_params(
 					&clk_mgr->base,
 					ctx->dc_bios->integrated_info,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h
index a7958dc965810..047d19ea919c7 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.h
@@ -36,6 +36,37 @@ typedef enum {
 	WCK_RATIO_MAX
 } WCK_RATIO_e;
 
+typedef struct {
+  uint32_t FClk;
+  uint32_t MemClk;
+  uint32_t Voltage;
+  uint8_t  WckRatio;
+  uint8_t  Spare[3];
+} DfPstateTable314_t;
+
+//Freq in MHz
+//Voltage in milli volts with 2 fractional bits
+typedef struct {
+  uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS];
+  uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS];
+  uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS];
+  uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS];
+  uint32_t VClocks[NUM_VCN_DPM_LEVELS];
+  uint32_t DClocks[NUM_VCN_DPM_LEVELS];
+  uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS];
+  DfPstateTable314_t DfPstateTable[NUM_DF_PSTATE_LEVELS];
+
+  uint8_t  NumDcfClkLevelsEnabled;
+  uint8_t  NumDispClkLevelsEnabled; //Applies to both Dispclk and Dppclk
+  uint8_t  NumSocClkLevelsEnabled;
+  uint8_t  VcnClkLevelsEnabled;     //Applies to both Vclk and Dclk
+  uint8_t  NumDfPstatesEnabled;
+  uint8_t  spare[3];
+
+  uint32_t MinGfxClk;
+  uint32_t MaxGfxClk;
+} DpmClocks314_t;
+
 struct dcn314_watermarks {
 	// Watermarks
 	WatermarkRowGeneric_t WatermarkRow[WM_COUNT][NUM_WM_RANGES];
@@ -43,7 +74,7 @@ struct dcn314_watermarks {
 };
 
 struct dcn314_smu_dpm_clks {
-	DpmClocks_t *dpm_clks;
+	DpmClocks314_t *dpm_clks;
 	union large_integer mc_address;
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
index cc076621f5e66..bff0f57e7fe61 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
@@ -507,7 +507,7 @@ static void dcn315_clk_mgr_helper_populate_bw_params(
 		bw_params->clk_table.entries[i].dispclk_mhz = clock_table->DispClocks[i];
 		bw_params->clk_table.entries[i].dppclk_mhz = clock_table->DppClocks[i];
 		bw_params->clk_table.entries[i].wck_ratio = 1;
-	};
+	}
 
 	/* Make sure to include at least one entry and highest pstate */
 	if (max_pstate != min_pstate || i == 0) {
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index cf99097887076..719ba38a10b8a 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1081,8 +1081,15 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 		struct dc_stream_state *old_stream =
 				dc->current_state->res_ctx.pipe_ctx[i].stream;
 		bool should_disable = true;
-		bool pipe_split_change =
-			context->res_ctx.pipe_ctx[i].top_pipe != dc->current_state->res_ctx.pipe_ctx[i].top_pipe;
+		bool pipe_split_change = false;
+
+		if ((context->res_ctx.pipe_ctx[i].top_pipe) &&
+			(dc->current_state->res_ctx.pipe_ctx[i].top_pipe))
+			pipe_split_change = context->res_ctx.pipe_ctx[i].top_pipe->pipe_idx !=
+				dc->current_state->res_ctx.pipe_ctx[i].top_pipe->pipe_idx;
+		else
+			pipe_split_change = context->res_ctx.pipe_ctx[i].top_pipe !=
+				dc->current_state->res_ctx.pipe_ctx[i].top_pipe;
 
 		for (j = 0; j < context->stream_count; j++) {
 			if (old_stream == context->streams[j]) {
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 6621f608b5a98..14361bc91ccb1 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -47,7 +47,7 @@ struct aux_payload;
 struct set_config_cmd_payload;
 struct dmub_notification;
 
-#define DC_VER "3.2.197"
+#define DC_VER "3.2.198"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
@@ -404,6 +404,7 @@ struct dc_config {
 	bool use_pipe_ctx_sync_logic;
 	bool ignore_dpref_ss;
 	bool enable_mipi_converter_optimization;
+	bool use_default_clock_table;
 };
 
 enum visual_confirm {
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index c8059c28ac494..09b304507badb 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -29,6 +29,7 @@
 #include "dm_helpers.h"
 #include "dc_hw_types.h"
 #include "core_types.h"
+#include "../basics/conversion.h"
 
 #define CTX dc_dmub_srv->ctx
 #define DC_LOGGER CTX->logger
@@ -600,6 +601,7 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc,
 			&cmd->fw_assisted_mclk_switch_v2.config_data.pipe_data[cmd_pipe_index];
 	struct dc_crtc_timing *main_timing = &subvp_pipe->stream->timing;
 	struct dc_crtc_timing *phantom_timing = &subvp_pipe->stream->mall_stream_config.paired_stream->timing;
+	uint32_t out_num, out_den;
 
 	pipe_data->mode = SUBVP;
 	pipe_data->pipe_config.subvp_data.pix_clk_100hz = subvp_pipe->stream->timing.pix_clk_100hz;
@@ -613,6 +615,15 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc,
 	pipe_data->pipe_config.subvp_data.main_pipe_index = subvp_pipe->pipe_idx;
 	pipe_data->pipe_config.subvp_data.is_drr = subvp_pipe->stream->ignore_msa_timing_param;
 
+	/* Calculate the scaling factor from the src and dst height.
+	 * e.g. If 3840x2160 being downscaled to 1920x1080, the scaling factor is 1/2.
+	 * Reduce the fraction 1080/2160 = 1/2 for the "scaling factor"
+	 */
+	reduce_fraction(subvp_pipe->stream->src.height, subvp_pipe->stream->dst.height, &out_num, &out_den);
+	// TODO: Uncomment below lines once DMCUB include headers are promoted
+	//pipe_data->pipe_config.subvp_data.scale_factor_numerator = out_num;
+	//pipe_data->pipe_config.subvp_data.scale_factor_denominator = out_den;
+
 	// Prefetch lines is equal to VACTIVE + BP + VSYNC
 	pipe_data->pipe_config.subvp_data.prefetch_lines =
 			phantom_timing->v_total - phantom_timing->v_front_porch;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
index f8cf7e8d4bef8..49f2f46e0f652 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
@@ -543,9 +543,11 @@ static void dce112_get_pix_clk_dividers_helper (
 		switch (pix_clk_params->color_depth) {
 		case COLOR_DEPTH_101010:
 			actual_pixel_clock_100hz = (actual_pixel_clock_100hz * 5) >> 2;
+			actual_pixel_clock_100hz -= actual_pixel_clock_100hz % 10;
 			break;
 		case COLOR_DEPTH_121212:
 			actual_pixel_clock_100hz = (actual_pixel_clock_100hz * 6) >> 2;
+			actual_pixel_clock_100hz -= actual_pixel_clock_100hz % 10;
 			break;
 		case COLOR_DEPTH_161616:
 			actual_pixel_clock_100hz = actual_pixel_clock_100hz * 2;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
index d4a6504dfe000..db7ca4b0cdb9d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
@@ -361,8 +361,6 @@ void dpp1_cnv_setup (
 		select = INPUT_CSC_SELECT_ICSC;
 		break;
 	case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
-		pixel_format = 22;
-		break;
 	case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
 		pixel_format = 26; /* ARGB16161616_UNORM */
 		break;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
index b54c124003237..564e061ccb589 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
@@ -278,9 +278,6 @@ void hubp1_program_pixel_format(
 				SURFACE_PIXEL_FORMAT, 10);
 		break;
 	case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
-		REG_UPDATE(DCSURF_SURFACE_CONFIG,
-				SURFACE_PIXEL_FORMAT, 22);
-		break;
 	case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: /*we use crossbar already*/
 		REG_UPDATE(DCSURF_SURFACE_CONFIG,
 				SURFACE_PIXEL_FORMAT, 26); /* ARGB16161616_UNORM */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
index 0877ab143b98b..1466c156847ac 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
@@ -1086,7 +1086,7 @@ static void optc1_set_test_pattern(
 				src_color[index] >> (src_bpc - dst_bpc);
 		/* CRTC_TEST_PATTERN_DATA has 16 bits,
 		 * lowest 6 are hardwired to ZERO
-		 * color bits should be left aligned aligned to MSB
+		 * color bits should be left aligned to MSB
 		 * XXXXXXXXXX000000 for 10 bit,
 		 * XXXXXXXX00000000 for 8 bit and XXXXXX0000000000 for 6
 		 */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c
index ea1f14af0db75..eaa7032f0f1a3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dpp.c
@@ -166,8 +166,6 @@ static void dpp2_cnv_setup (
 		select = DCN2_ICSC_SELECT_ICSC_A;
 		break;
 	case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
-		pixel_format = 22;
-		break;
 	case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
 		pixel_format = 26; /* ARGB16161616_UNORM */
 		break;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
index 936af65381ef7..9570c2118ccc7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
@@ -463,9 +463,6 @@ void hubp2_program_pixel_format(
 				SURFACE_PIXEL_FORMAT, 10);
 		break;
 	case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
-		REG_UPDATE(DCSURF_SURFACE_CONFIG,
-				SURFACE_PIXEL_FORMAT, 22);
-		break;
 	case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: /*we use crossbar already*/
 		REG_UPDATE(DCSURF_SURFACE_CONFIG,
 				SURFACE_PIXEL_FORMAT, 26); /* ARGB16161616_UNORM */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
index c5e200d09038f..5752271f22dfe 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
@@ -67,9 +67,15 @@ static uint32_t convert_and_clamp(
 void dcn21_dchvm_init(struct hubbub *hubbub)
 {
 	struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub);
-	uint32_t riommu_active;
+	uint32_t riommu_active, prefetch_done;
 	int i;
 
+	REG_GET(DCHVM_RIOMMU_STAT0, HOSTVM_PREFETCH_DONE, &prefetch_done);
+
+	if (prefetch_done) {
+		hubbub->riommu_active = true;
+		return;
+	}
 	//Init DCHVM block
 	REG_UPDATE(DCHVM_CTRL0, HOSTVM_INIT_REQ, 1);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
index 77b00f86c2165..4a668d6563dfd 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
@@ -244,8 +244,6 @@ void dpp3_cnv_setup (
 		select = INPUT_CSC_SELECT_ICSC;
 		break;
 	case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
-		pixel_format = 22;
-		break;
 	case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
 		pixel_format = 26; /* ARGB16161616_UNORM */
 		break;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c
index 6df6598d217dc..d4b488b1f9ffe 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c
@@ -186,14 +186,6 @@ void optc3_set_dsc_config(struct timing_generator *optc,
 }
 #endif
 
-void optc3_set_vrr_m_const(struct timing_generator *optc,
-		double vtotal_avg)
-{
-	DC_FP_START();
-	optc3_fpu_set_vrr_m_const(optc, vtotal_avg);
-	DC_FP_END();
-}
-
 void optc3_set_odm_bypass(struct timing_generator *optc,
 		const struct dc_crtc_timing *dc_crtc_timing)
 {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h
index 62f0ba7a3dd03..c305c69f2d134 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h
@@ -329,9 +329,6 @@ void optc3_lock_doublebuffer_enable(struct timing_generator *optc);
 
 void optc3_lock_doublebuffer_disable(struct timing_generator *optc);
 
-void optc3_set_vrr_m_const(struct timing_generator *optc,
-		double vtotal_avg);
-
 void optc3_set_drr_trigger_window(struct timing_generator *optc,
 		uint32_t window_start, uint32_t window_end);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
index bb815654dbd22..6f1aef3d2bf85 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
@@ -332,7 +332,8 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
 bool dcn32_apply_idle_power_optimizations(struct dc *dc, bool enable)
 {
 	union dmub_rb_cmd cmd;
-	uint8_t ways, i, j;
+	uint8_t ways, i;
+	int j;
 	bool stereo_in_use = false;
 	struct dc_plane_state *plane = NULL;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c
index bd189802c7902..a222e56594e5c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c
@@ -281,7 +281,6 @@ static struct timing_generator_funcs dcn32_tg_funcs = {
 		.lock_doublebuffer_enable = optc3_lock_doublebuffer_enable,
 		.lock_doublebuffer_disable = optc3_lock_doublebuffer_disable,
 		.enable_optc_clock = optc1_enable_optc_clock,
-		.set_vrr_m_const = optc3_set_vrr_m_const,
 		.set_drr = optc32_set_drr,
 		.get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal,
 		.set_vtotal_min_max = optc3_set_vtotal_min_max,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index fba95082b5c36..e59aad43ea0b7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -63,7 +63,7 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat
 		if (pipe->stream && pipe->plane_state && !pipe->top_pipe &&
 				pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
 			bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4;
-			mall_region_pixels = pipe->stream->timing.h_addressable * pipe->stream->timing.v_addressable;
+			mall_region_pixels = pipe->plane_state->plane_size.surface_pitch * pipe->stream->timing.v_addressable;
 
 			// For bytes required in MALL, calculate based on number of MBlks required
 			num_mblks = (mall_region_pixels * bytes_per_pixel +
diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
index ef1eee2e54d9a..795d6fb0eaa97 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
@@ -1669,7 +1669,7 @@ static bool dcn321_resource_construct(
 	dc->caps.subvp_prefetch_end_to_mall_start_us = 15;
 	dc->caps.subvp_swath_height_margin_lines = 16;
 	dc->caps.subvp_pstate_allow_width_us = 20;
-
+	dc->caps.subvp_vertical_int_margin_us = 30;
 	dc->caps.max_slave_planes = 1;
 	dc->caps.max_slave_yuv_planes = 1;
 	dc->caps.max_slave_rgb_planes = 1;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/bw_fixed.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/bw_fixed.c
index 6ca288fb5fb9e..3aa8dd0acd5e8 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/calcs/bw_fixed.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/bw_fixed.c
@@ -25,12 +25,11 @@
 #include "dm_services.h"
 #include "bw_fixed.h"
 
+#define MAX_I64 \
+	((int64_t)((1ULL << 63) - 1))
 
 #define MIN_I64 \
-	(int64_t)(-(1LL << 63))
-
-#define MAX_I64 \
-	(int64_t)((1ULL << 63) - 1)
+	(-MAX_I64 - 1)
 
 #define FRACTIONAL_PART_MASK \
 	((1ULL << BW_FIXED_BITS_PER_FRACTIONAL_PART) - 1)
@@ -49,6 +48,7 @@ static uint64_t abs_i64(int64_t arg)
 struct bw_fixed bw_int_to_fixed_nonconst(int64_t value)
 {
 	struct bw_fixed res;
+
 	ASSERT(value < BW_FIXED_MAX_I32 && value > BW_FIXED_MIN_I32);
 	res.value = value << BW_FIXED_BITS_PER_FRACTIONAL_PART;
 	return res;
@@ -78,14 +78,12 @@ struct bw_fixed bw_frc_to_fixed(int64_t numerator, int64_t denominator)
 	{
 		uint32_t i = BW_FIXED_BITS_PER_FRACTIONAL_PART;
 
-		do
-		{
+		do {
 			remainder <<= 1;
 
 			res_value <<= 1;
 
-			if (remainder >= arg2_value)
-			{
+			if (remainder >= arg2_value) {
 				res_value |= 1;
 				remainder -= arg2_value;
 			}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
index 3fab19134480d..d63b4209b14c0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
@@ -26,7 +26,7 @@
 #include "dc.h"
 #include "dc_link.h"
 #include "../display_mode_lib.h"
-#include "dml/dcn30/display_mode_vba_30.h"
+#include "../dcn30/display_mode_vba_30.h"
 #include "display_mode_vba_31.h"
 #include "../dml_inline_defs.h"
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
index 66b82e4f05c6e..35d10b4d018bf 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c
@@ -27,7 +27,7 @@
 #include "../display_mode_vba.h"
 #include "../dml_inline_defs.h"
 #include "display_rq_dlg_calc_31.h"
-#include "dml/dcn30/display_mode_vba_30.h"
+#include "../dcn30/display_mode_vba_30.h"
 
 static bool is_dual_plane(enum source_format_class source_format)
 {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
index c80307a6af1bf..34a5d0f87b5f9 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
@@ -189,7 +189,7 @@ void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p
 	dc_assert_fp_enabled();
 
 	// Default clock levels are used for diags, which may lead to overclocking.
-	if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
+	if (!IS_DIAG_DC(dc->ctx->dce_environment) && dc->config.use_default_clock_table == false) {
 
 		dcn3_14_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator;
 		dcn3_14_ip.max_num_dpp = dc->res_pool->pipe_count;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index a51e74344698e..0813f4fdb68c4 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -493,6 +493,7 @@ void dcn32_set_phantom_stream_timing(struct dc *dc,
 						phantom_stream->timing.v_front_porch +
 						phantom_stream->timing.v_sync_width +
 						phantom_bp;
+	phantom_stream->timing.flags.DSC = 0; // Don't need DSC for phantom timing
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c
deleted file mode 100644
index b4b51e51fc25c..0000000000000
--- a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c
+++ /dev/null
@@ -1,1884 +0,0 @@
-/*
- * Copyright 2017 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#include "resource.h"
-#include "core_types.h"
-#include "dsc.h"
-#include "clk_mgr.h"
-
-#ifndef DC_LOGGER_INIT
-#define DC_LOGGER_INIT
-#undef DC_LOG_WARNING
-#define DC_LOG_WARNING
-#endif
-
-#define DML_WRAPPER_TRANSLATION_
-#include "dml_wrapper_translation.c"
-#undef DML_WRAPPER_TRANSLATION_
-
-static bool is_dual_plane(enum surface_pixel_format format)
-{
-	return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
-}
-
-static void build_clamping_params(struct dc_stream_state *stream)
-{
-	stream->clamping.clamping_level = CLAMPING_FULL_RANGE;
-	stream->clamping.c_depth = stream->timing.display_color_depth;
-	stream->clamping.pixel_encoding = stream->timing.pixel_encoding;
-}
-
-static void get_pixel_clock_parameters(
-	const struct pipe_ctx *pipe_ctx,
-	struct pixel_clk_params *pixel_clk_params)
-{
-	const struct dc_stream_state *stream = pipe_ctx->stream;
-
-	/*TODO: is this halved for YCbCr 420? in that case we might want to move
-	 * the pixel clock normalization for hdmi up to here instead of doing it
-	 * in pll_adjust_pix_clk
-	 */
-	pixel_clk_params->requested_pix_clk_100hz = stream->timing.pix_clk_100hz;
-	pixel_clk_params->encoder_object_id = stream->link->link_enc->id;
-	pixel_clk_params->signal_type = pipe_ctx->stream->signal;
-	pixel_clk_params->controller_id = pipe_ctx->stream_res.tg->inst + 1;
-	/* TODO: un-hardcode*/
-	pixel_clk_params->requested_sym_clk = LINK_RATE_LOW *
-						LINK_RATE_REF_FREQ_IN_KHZ;
-	pixel_clk_params->flags.ENABLE_SS = 0;
-	pixel_clk_params->color_depth =
-		stream->timing.display_color_depth;
-	pixel_clk_params->flags.DISPLAY_BLANKED = 1;
-	pixel_clk_params->flags.SUPPORT_YCBCR420 = (stream->timing.pixel_encoding ==
-			PIXEL_ENCODING_YCBCR420);
-	pixel_clk_params->pixel_encoding = stream->timing.pixel_encoding;
-	if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR422) {
-		pixel_clk_params->color_depth = COLOR_DEPTH_888;
-	}
-	if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) {
-		pixel_clk_params->requested_pix_clk_100hz  = pixel_clk_params->requested_pix_clk_100hz / 2;
-	}
-	if (stream->timing.timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING)
-		pixel_clk_params->requested_pix_clk_100hz *= 2;
-
-}
-
-static void resource_build_bit_depth_reduction_params(struct dc_stream_state *stream,
-		struct bit_depth_reduction_params *fmt_bit_depth)
-{
-	enum dc_dither_option option = stream->dither_option;
-	enum dc_pixel_encoding pixel_encoding =
-			stream->timing.pixel_encoding;
-
-	memset(fmt_bit_depth, 0, sizeof(*fmt_bit_depth));
-
-	if (option == DITHER_OPTION_DEFAULT) {
-		switch (stream->timing.display_color_depth) {
-		case COLOR_DEPTH_666:
-			option = DITHER_OPTION_SPATIAL6;
-			break;
-		case COLOR_DEPTH_888:
-			option = DITHER_OPTION_SPATIAL8;
-			break;
-		case COLOR_DEPTH_101010:
-			option = DITHER_OPTION_SPATIAL10;
-			break;
-		default:
-			option = DITHER_OPTION_DISABLE;
-		}
-	}
-
-	if (option == DITHER_OPTION_DISABLE)
-		return;
-
-	if (option == DITHER_OPTION_TRUN6) {
-		fmt_bit_depth->flags.TRUNCATE_ENABLED = 1;
-		fmt_bit_depth->flags.TRUNCATE_DEPTH = 0;
-	} else if (option == DITHER_OPTION_TRUN8 ||
-			option == DITHER_OPTION_TRUN8_SPATIAL6 ||
-			option == DITHER_OPTION_TRUN8_FM6) {
-		fmt_bit_depth->flags.TRUNCATE_ENABLED = 1;
-		fmt_bit_depth->flags.TRUNCATE_DEPTH = 1;
-	} else if (option == DITHER_OPTION_TRUN10        ||
-			option == DITHER_OPTION_TRUN10_SPATIAL6   ||
-			option == DITHER_OPTION_TRUN10_SPATIAL8   ||
-			option == DITHER_OPTION_TRUN10_FM8     ||
-			option == DITHER_OPTION_TRUN10_FM6     ||
-			option == DITHER_OPTION_TRUN10_SPATIAL8_FM6) {
-		fmt_bit_depth->flags.TRUNCATE_ENABLED = 1;
-		fmt_bit_depth->flags.TRUNCATE_DEPTH = 2;
-	}
-
-	/* special case - Formatter can only reduce by 4 bits at most.
-	 * When reducing from 12 to 6 bits,
-	 * HW recommends we use trunc with round mode
-	 * (if we did nothing, trunc to 10 bits would be used)
-	 * note that any 12->10 bit reduction is ignored prior to DCE8,
-	 * as the input was 10 bits.
-	 */
-	if (option == DITHER_OPTION_SPATIAL6_FRAME_RANDOM ||
-			option == DITHER_OPTION_SPATIAL6 ||
-			option == DITHER_OPTION_FM6) {
-		fmt_bit_depth->flags.TRUNCATE_ENABLED = 1;
-		fmt_bit_depth->flags.TRUNCATE_DEPTH = 2;
-		fmt_bit_depth->flags.TRUNCATE_MODE = 1;
-	}
-
-	/* spatial dither
-	 * note that spatial modes 1-3 are never used
-	 */
-	if (option == DITHER_OPTION_SPATIAL6_FRAME_RANDOM            ||
-			option == DITHER_OPTION_SPATIAL6 ||
-			option == DITHER_OPTION_TRUN10_SPATIAL6      ||
-			option == DITHER_OPTION_TRUN8_SPATIAL6) {
-		fmt_bit_depth->flags.SPATIAL_DITHER_ENABLED = 1;
-		fmt_bit_depth->flags.SPATIAL_DITHER_DEPTH = 0;
-		fmt_bit_depth->flags.HIGHPASS_RANDOM = 1;
-		fmt_bit_depth->flags.RGB_RANDOM =
-				(pixel_encoding == PIXEL_ENCODING_RGB) ? 1 : 0;
-	} else if (option == DITHER_OPTION_SPATIAL8_FRAME_RANDOM            ||
-			option == DITHER_OPTION_SPATIAL8 ||
-			option == DITHER_OPTION_SPATIAL8_FM6        ||
-			option == DITHER_OPTION_TRUN10_SPATIAL8      ||
-			option == DITHER_OPTION_TRUN10_SPATIAL8_FM6) {
-		fmt_bit_depth->flags.SPATIAL_DITHER_ENABLED = 1;
-		fmt_bit_depth->flags.SPATIAL_DITHER_DEPTH = 1;
-		fmt_bit_depth->flags.HIGHPASS_RANDOM = 1;
-		fmt_bit_depth->flags.RGB_RANDOM =
-				(pixel_encoding == PIXEL_ENCODING_RGB) ? 1 : 0;
-	} else if (option == DITHER_OPTION_SPATIAL10_FRAME_RANDOM ||
-			option == DITHER_OPTION_SPATIAL10 ||
-			option == DITHER_OPTION_SPATIAL10_FM8 ||
-			option == DITHER_OPTION_SPATIAL10_FM6) {
-		fmt_bit_depth->flags.SPATIAL_DITHER_ENABLED = 1;
-		fmt_bit_depth->flags.SPATIAL_DITHER_DEPTH = 2;
-		fmt_bit_depth->flags.HIGHPASS_RANDOM = 1;
-		fmt_bit_depth->flags.RGB_RANDOM =
-				(pixel_encoding == PIXEL_ENCODING_RGB) ? 1 : 0;
-	}
-
-	if (option == DITHER_OPTION_SPATIAL6 ||
-			option == DITHER_OPTION_SPATIAL8 ||
-			option == DITHER_OPTION_SPATIAL10) {
-		fmt_bit_depth->flags.FRAME_RANDOM = 0;
-	} else {
-		fmt_bit_depth->flags.FRAME_RANDOM = 1;
-	}
-
-	//////////////////////
-	//// temporal dither
-	//////////////////////
-	if (option == DITHER_OPTION_FM6           ||
-			option == DITHER_OPTION_SPATIAL8_FM6     ||
-			option == DITHER_OPTION_SPATIAL10_FM6     ||
-			option == DITHER_OPTION_TRUN10_FM6     ||
-			option == DITHER_OPTION_TRUN8_FM6      ||
-			option == DITHER_OPTION_TRUN10_SPATIAL8_FM6) {
-		fmt_bit_depth->flags.FRAME_MODULATION_ENABLED = 1;
-		fmt_bit_depth->flags.FRAME_MODULATION_DEPTH = 0;
-	} else if (option == DITHER_OPTION_FM8        ||
-			option == DITHER_OPTION_SPATIAL10_FM8  ||
-			option == DITHER_OPTION_TRUN10_FM8) {
-		fmt_bit_depth->flags.FRAME_MODULATION_ENABLED = 1;
-		fmt_bit_depth->flags.FRAME_MODULATION_DEPTH = 1;
-	} else if (option == DITHER_OPTION_FM10) {
-		fmt_bit_depth->flags.FRAME_MODULATION_ENABLED = 1;
-		fmt_bit_depth->flags.FRAME_MODULATION_DEPTH = 2;
-	}
-
-	fmt_bit_depth->pixel_encoding = pixel_encoding;
-}
-
-/* Move this after the above function as VS complains about
- * declaration issues for resource_build_bit_depth_reduction_params.
- */
-
-static enum dc_status build_pipe_hw_param(struct pipe_ctx *pipe_ctx)
-{
-
-	get_pixel_clock_parameters(pipe_ctx, &pipe_ctx->stream_res.pix_clk_params);
-
-	if (pipe_ctx->clock_source)
-		pipe_ctx->clock_source->funcs->get_pix_clk_dividers(
-			pipe_ctx->clock_source,
-			&pipe_ctx->stream_res.pix_clk_params,
-			&pipe_ctx->pll_settings);
-
-	pipe_ctx->stream->clamping.pixel_encoding = pipe_ctx->stream->timing.pixel_encoding;
-
-	resource_build_bit_depth_reduction_params(pipe_ctx->stream,
-		&pipe_ctx->stream->bit_depth_params);
-	build_clamping_params(pipe_ctx->stream);
-
-	return DC_OK;
-}
-
-bool dml_validate_dsc(struct dc *dc, struct dc_state *new_ctx)
-{
-	int i;
-
-	/* Validate DSC config, dsc count validation is already done */
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe_ctx = &new_ctx->res_ctx.pipe_ctx[i];
-		struct dc_stream_state *stream = pipe_ctx->stream;
-		struct dsc_config dsc_cfg;
-		struct pipe_ctx *odm_pipe;
-		int opp_cnt = 1;
-
-		for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe)
-			opp_cnt++;
-
-		/* Only need to validate top pipe */
-		if (pipe_ctx->top_pipe || pipe_ctx->prev_odm_pipe || !stream || !stream->timing.flags.DSC)
-			continue;
-
-		dsc_cfg.pic_width = (stream->timing.h_addressable + stream->timing.h_border_left
-				+ stream->timing.h_border_right) / opp_cnt;
-		dsc_cfg.pic_height = stream->timing.v_addressable + stream->timing.v_border_top
-				+ stream->timing.v_border_bottom;
-		dsc_cfg.pixel_encoding = stream->timing.pixel_encoding;
-		dsc_cfg.color_depth = stream->timing.display_color_depth;
-		dsc_cfg.is_odm = pipe_ctx->next_odm_pipe ? true : false;
-		dsc_cfg.dc_dsc_cfg = stream->timing.dsc_cfg;
-		dsc_cfg.dc_dsc_cfg.num_slices_h /= opp_cnt;
-
-		if (pipe_ctx->stream_res.dsc && !pipe_ctx->stream_res.dsc->funcs->dsc_validate_stream(pipe_ctx->stream_res.dsc, &dsc_cfg))
-			return false;
-	}
-	return true;
-}
-
-enum dc_status dml_build_mapped_resource(const struct dc *dc, struct dc_state *context, struct dc_stream_state *stream)
-{
-	enum dc_status status = DC_OK;
-	struct pipe_ctx *pipe_ctx = resource_get_head_pipe_for_stream(&context->res_ctx, stream);
-
-	if (!pipe_ctx)
-		return DC_ERROR_UNEXPECTED;
-
-
-	status = build_pipe_hw_param(pipe_ctx);
-
-	return status;
-}
-
-void dml_acquire_dsc(const struct dc *dc,
-			struct resource_context *res_ctx,
-			struct display_stream_compressor **dsc,
-			int pipe_idx)
-{
-	int i;
-	const struct resource_pool *pool = dc->res_pool;
-	struct display_stream_compressor *dsc_old = dc->current_state->res_ctx.pipe_ctx[pipe_idx].stream_res.dsc;
-
-	ASSERT(*dsc == NULL); /* If this ASSERT fails, dsc was not released properly */
-	*dsc = NULL;
-
-	/* Always do 1-to-1 mapping when number of DSCs is same as number of pipes */
-	if (pool->res_cap->num_dsc == pool->res_cap->num_opp) {
-		*dsc = pool->dscs[pipe_idx];
-		res_ctx->is_dsc_acquired[pipe_idx] = true;
-		return;
-	}
-
-	/* Return old DSC to avoid the need for redo it */
-	if (dsc_old && !res_ctx->is_dsc_acquired[dsc_old->inst]) {
-		*dsc = dsc_old;
-		res_ctx->is_dsc_acquired[dsc_old->inst] = true;
-		return ;
-	}
-
-	/* Find first free DSC */
-	for (i = 0; i < pool->res_cap->num_dsc; i++)
-		if (!res_ctx->is_dsc_acquired[i]) {
-			*dsc = pool->dscs[i];
-			res_ctx->is_dsc_acquired[i] = true;
-			break;
-		}
-}
-
-static bool dml_split_stream_for_mpc_or_odm(
-		const struct dc *dc,
-		struct resource_context *res_ctx,
-		struct pipe_ctx *pri_pipe,
-		struct pipe_ctx *sec_pipe,
-		bool odm)
-{
-	int pipe_idx = sec_pipe->pipe_idx;
-	const struct resource_pool *pool = dc->res_pool;
-
-	*sec_pipe = *pri_pipe;
-
-	sec_pipe->pipe_idx = pipe_idx;
-	sec_pipe->plane_res.mi = pool->mis[pipe_idx];
-	sec_pipe->plane_res.hubp = pool->hubps[pipe_idx];
-	sec_pipe->plane_res.ipp = pool->ipps[pipe_idx];
-	sec_pipe->plane_res.xfm = pool->transforms[pipe_idx];
-	sec_pipe->plane_res.dpp = pool->dpps[pipe_idx];
-	sec_pipe->plane_res.mpcc_inst = pool->dpps[pipe_idx]->inst;
-	sec_pipe->stream_res.dsc = NULL;
-	if (odm) {
-		if (pri_pipe->next_odm_pipe) {
-			ASSERT(pri_pipe->next_odm_pipe != sec_pipe);
-			sec_pipe->next_odm_pipe = pri_pipe->next_odm_pipe;
-			sec_pipe->next_odm_pipe->prev_odm_pipe = sec_pipe;
-		}
-		if (pri_pipe->top_pipe && pri_pipe->top_pipe->next_odm_pipe) {
-			pri_pipe->top_pipe->next_odm_pipe->bottom_pipe = sec_pipe;
-			sec_pipe->top_pipe = pri_pipe->top_pipe->next_odm_pipe;
-		}
-		if (pri_pipe->bottom_pipe && pri_pipe->bottom_pipe->next_odm_pipe) {
-			pri_pipe->bottom_pipe->next_odm_pipe->top_pipe = sec_pipe;
-			sec_pipe->bottom_pipe = pri_pipe->bottom_pipe->next_odm_pipe;
-		}
-		pri_pipe->next_odm_pipe = sec_pipe;
-		sec_pipe->prev_odm_pipe = pri_pipe;
-		ASSERT(sec_pipe->top_pipe == NULL);
-
-		if (!sec_pipe->top_pipe)
-			sec_pipe->stream_res.opp = pool->opps[pipe_idx];
-		else
-			sec_pipe->stream_res.opp = sec_pipe->top_pipe->stream_res.opp;
-		if (sec_pipe->stream->timing.flags.DSC == 1) {
-			dml_acquire_dsc(dc, res_ctx, &sec_pipe->stream_res.dsc, pipe_idx);
-			ASSERT(sec_pipe->stream_res.dsc);
-			if (sec_pipe->stream_res.dsc == NULL)
-				return false;
-		}
-	} else {
-		if (pri_pipe->bottom_pipe) {
-			ASSERT(pri_pipe->bottom_pipe != sec_pipe);
-			sec_pipe->bottom_pipe = pri_pipe->bottom_pipe;
-			sec_pipe->bottom_pipe->top_pipe = sec_pipe;
-		}
-		pri_pipe->bottom_pipe = sec_pipe;
-		sec_pipe->top_pipe = pri_pipe;
-
-		ASSERT(pri_pipe->plane_state);
-	}
-
-	return true;
-}
-
-static struct pipe_ctx *dml_find_split_pipe(
-		struct dc *dc,
-		struct dc_state *context,
-		int old_index)
-{
-	struct pipe_ctx *pipe = NULL;
-	int i;
-
-	if (old_index >= 0 && context->res_ctx.pipe_ctx[old_index].stream == NULL) {
-		pipe = &context->res_ctx.pipe_ctx[old_index];
-		pipe->pipe_idx = old_index;
-	}
-
-	if (!pipe)
-		for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) {
-			if (dc->current_state->res_ctx.pipe_ctx[i].top_pipe == NULL
-					&& dc->current_state->res_ctx.pipe_ctx[i].prev_odm_pipe == NULL) {
-				if (context->res_ctx.pipe_ctx[i].stream == NULL) {
-					pipe = &context->res_ctx.pipe_ctx[i];
-					pipe->pipe_idx = i;
-					break;
-				}
-			}
-		}
-
-	/*
-	 * May need to fix pipes getting tossed from 1 opp to another on flip
-	 * Add for debugging transient underflow during topology updates:
-	 * ASSERT(pipe);
-	 */
-	if (!pipe)
-		for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) {
-			if (context->res_ctx.pipe_ctx[i].stream == NULL) {
-				pipe = &context->res_ctx.pipe_ctx[i];
-				pipe->pipe_idx = i;
-				break;
-			}
-		}
-
-	return pipe;
-}
-
-static void dml_release_dsc(struct resource_context *res_ctx,
-			const struct resource_pool *pool,
-			struct display_stream_compressor **dsc)
-{
-	int i;
-
-	for (i = 0; i < pool->res_cap->num_dsc; i++)
-		if (pool->dscs[i] == *dsc) {
-			res_ctx->is_dsc_acquired[i] = false;
-			*dsc = NULL;
-			break;
-		}
-}
-
-static int dml_get_num_mpc_splits(struct pipe_ctx *pipe)
-{
-	int mpc_split_count = 0;
-	struct pipe_ctx *other_pipe = pipe->bottom_pipe;
-
-	while (other_pipe && other_pipe->plane_state == pipe->plane_state) {
-		mpc_split_count++;
-		other_pipe = other_pipe->bottom_pipe;
-	}
-	other_pipe = pipe->top_pipe;
-	while (other_pipe && other_pipe->plane_state == pipe->plane_state) {
-		mpc_split_count++;
-		other_pipe = other_pipe->top_pipe;
-	}
-
-	return mpc_split_count;
-}
-
-static bool dml_enough_pipes_for_subvp(struct dc *dc,
-		struct dc_state *context)
-{
-	int i = 0;
-	int num_pipes = 0;
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
-		if (pipe->stream && pipe->plane_state)
-			num_pipes++;
-	}
-
-	// Sub-VP only possible if the number of "real" pipes is
-	// less than or equal to half the number of available pipes
-	if (num_pipes * 2 > dc->res_pool->pipe_count)
-		return false;
-
-	return true;
-}
-
-static int dml_validate_apply_pipe_split_flags(
-		struct dc *dc,
-		struct dc_state *context,
-		int vlevel,
-		int *split,
-		bool *merge)
-{
-	int i, pipe_idx, vlevel_split;
-	int plane_count = 0;
-	bool force_split = false;
-	bool avoid_split = dc->debug.pipe_split_policy == MPC_SPLIT_AVOID;
-	struct vba_vars_st *v = &context->bw_ctx.dml.vba;
-	int max_mpc_comb = v->maxMpcComb;
-
-	if (context->stream_count > 1) {
-		if (dc->debug.pipe_split_policy == MPC_SPLIT_AVOID_MULT_DISP)
-			avoid_split = true;
-	} else if (dc->debug.force_single_disp_pipe_split)
-			force_split = true;
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
-		/**
-		 * Workaround for avoiding pipe-split in cases where we'd split
-		 * planes that are too small, resulting in splits that aren't
-		 * valid for the scaler.
-		 */
-		if (pipe->plane_state &&
-		    (pipe->plane_state->dst_rect.width <= 16 ||
-		     pipe->plane_state->dst_rect.height <= 16 ||
-		     pipe->plane_state->src_rect.width <= 16 ||
-		     pipe->plane_state->src_rect.height <= 16))
-			avoid_split = true;
-
-		/* TODO: fix dc bugs and remove this split threshold thing */
-		if (pipe->stream && !pipe->prev_odm_pipe &&
-				(!pipe->top_pipe || pipe->top_pipe->plane_state != pipe->plane_state))
-			++plane_count;
-	}
-	if (plane_count > dc->res_pool->pipe_count / 2)
-		avoid_split = true;
-
-	/* W/A: Mode timing with borders may not work well with pipe split, avoid for this corner case */
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-		struct dc_crtc_timing timing;
-
-		if (!pipe->stream)
-			continue;
-		else {
-			timing = pipe->stream->timing;
-			if (timing.h_border_left + timing.h_border_right
-					+ timing.v_border_top + timing.v_border_bottom > 0) {
-				avoid_split = true;
-				break;
-			}
-		}
-	}
-
-	/* Avoid split loop looks for lowest voltage level that allows most unsplit pipes possible */
-	if (avoid_split) {
-		for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
-			if (!context->res_ctx.pipe_ctx[i].stream)
-				continue;
-
-			for (vlevel_split = vlevel; vlevel <= context->bw_ctx.dml.soc.num_states; vlevel++)
-				if (v->NoOfDPP[vlevel][0][pipe_idx] == 1 &&
-						v->ModeSupport[vlevel][0])
-					break;
-			/* Impossible to not split this pipe */
-			if (vlevel > context->bw_ctx.dml.soc.num_states)
-				vlevel = vlevel_split;
-			else
-				max_mpc_comb = 0;
-			pipe_idx++;
-		}
-		v->maxMpcComb = max_mpc_comb;
-	}
-
-	/* Split loop sets which pipe should be split based on dml outputs and dc flags */
-	for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-		int pipe_plane = v->pipe_plane[pipe_idx];
-		bool split4mpc = context->stream_count == 1 && plane_count == 1
-				&& dc->config.enable_4to1MPC && dc->res_pool->pipe_count >= 4;
-
-		if (!context->res_ctx.pipe_ctx[i].stream)
-			continue;
-
-		if (split4mpc || v->NoOfDPP[vlevel][max_mpc_comb][pipe_plane] == 4)
-			split[i] = 4;
-		else if (force_split || v->NoOfDPP[vlevel][max_mpc_comb][pipe_plane] == 2)
-				split[i] = 2;
-
-		if ((pipe->stream->view_format ==
-				VIEW_3D_FORMAT_SIDE_BY_SIDE ||
-				pipe->stream->view_format ==
-				VIEW_3D_FORMAT_TOP_AND_BOTTOM) &&
-				(pipe->stream->timing.timing_3d_format ==
-				TIMING_3D_FORMAT_TOP_AND_BOTTOM ||
-				 pipe->stream->timing.timing_3d_format ==
-				TIMING_3D_FORMAT_SIDE_BY_SIDE))
-			split[i] = 2;
-		if (dc->debug.force_odm_combine & (1 << pipe->stream_res.tg->inst)) {
-			split[i] = 2;
-			v->ODMCombineEnablePerState[vlevel][pipe_plane] = dm_odm_combine_mode_2to1;
-		}
-		if (dc->debug.force_odm_combine_4to1 & (1 << pipe->stream_res.tg->inst)) {
-			split[i] = 4;
-			v->ODMCombineEnablePerState[vlevel][pipe_plane] = dm_odm_combine_mode_4to1;
-		}
-		/*420 format workaround*/
-		if (pipe->stream->timing.h_addressable > 7680 &&
-				pipe->stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420) {
-			split[i] = 4;
-		}
-
-		v->ODMCombineEnabled[pipe_plane] =
-			v->ODMCombineEnablePerState[vlevel][pipe_plane];
-
-		if (v->ODMCombineEnabled[pipe_plane] == dm_odm_combine_mode_disabled) {
-			if (dml_get_num_mpc_splits(pipe) == 1) {
-				/*If need split for mpc but 2 way split already*/
-				if (split[i] == 4)
-					split[i] = 2; /* 2 -> 4 MPC */
-				else if (split[i] == 2)
-					split[i] = 0; /* 2 -> 2 MPC */
-				else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state)
-					merge[i] = true; /* 2 -> 1 MPC */
-			} else if (dml_get_num_mpc_splits(pipe) == 3) {
-				/*If need split for mpc but 4 way split already*/
-				if (split[i] == 2 && ((pipe->top_pipe && !pipe->top_pipe->top_pipe)
-						|| !pipe->bottom_pipe)) {
-					merge[i] = true; /* 4 -> 2 MPC */
-				} else if (split[i] == 0 && pipe->top_pipe &&
-						pipe->top_pipe->plane_state == pipe->plane_state)
-					merge[i] = true; /* 4 -> 1 MPC */
-				split[i] = 0;
-			} else if (dml_get_num_mpc_splits(pipe)) {
-				/* ODM -> MPC transition */
-				if (pipe->prev_odm_pipe) {
-					split[i] = 0;
-					merge[i] = true;
-				}
-			}
-		} else {
-			if (dml_get_num_mpc_splits(pipe) == 1) {
-				/*If need split for odm but 2 way split already*/
-				if (split[i] == 4)
-					split[i] = 2; /* 2 -> 4 ODM */
-				else if (split[i] == 2)
-					split[i] = 0; /* 2 -> 2 ODM */
-				else if (pipe->prev_odm_pipe) {
-					ASSERT(0); /* NOT expected yet */
-					merge[i] = true; /* exit ODM */
-				}
-			} else if (dml_get_num_mpc_splits(pipe) == 3) {
-				/*If need split for odm but 4 way split already*/
-				if (split[i] == 2 && ((pipe->prev_odm_pipe && !pipe->prev_odm_pipe->prev_odm_pipe)
-						|| !pipe->next_odm_pipe)) {
-					ASSERT(0); /* NOT expected yet */
-					merge[i] = true; /* 4 -> 2 ODM */
-				} else if (split[i] == 0 && pipe->prev_odm_pipe) {
-					ASSERT(0); /* NOT expected yet */
-					merge[i] = true; /* exit ODM */
-				}
-				split[i] = 0;
-			} else if (dml_get_num_mpc_splits(pipe)) {
-				/* MPC -> ODM transition */
-				ASSERT(0); /* NOT expected yet */
-				if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) {
-					split[i] = 0;
-					merge[i] = true;
-				}
-			}
-		}
-
-		/* Adjust dppclk when split is forced, do not bother with dispclk */
-		if (split[i] != 0 && v->NoOfDPP[vlevel][max_mpc_comb][pipe_idx] == 1)
-			v->RequiredDPPCLK[vlevel][max_mpc_comb][pipe_idx] /= 2;
-		pipe_idx++;
-	}
-
-	return vlevel;
-}
-
-static void dml_set_phantom_stream_timing(struct dc *dc,
-		struct dc_state *context,
-		struct pipe_ctx *ref_pipe,
-		struct dc_stream_state *phantom_stream)
-{
-	// phantom_vactive = blackout (latency + margin) + fw_processing_delays + pstate allow width
-	uint32_t phantom_vactive_us = context->bw_ctx.dml.soc.dram_clock_change_latency_us + 60 +
-					dc->caps.subvp_fw_processing_delay_us +
-					dc->caps.subvp_pstate_allow_width_us;
-	uint32_t phantom_vactive = ((double)phantom_vactive_us/1000000) *
-					(ref_pipe->stream->timing.pix_clk_100hz * 100) /
-					(double)ref_pipe->stream->timing.h_total;
-	uint32_t phantom_bp = ref_pipe->pipe_dlg_param.vstartup_start;
-
-	phantom_stream->dst.y = 0;
-	phantom_stream->dst.height = phantom_vactive;
-	phantom_stream->src.y = 0;
-	phantom_stream->src.height = phantom_vactive;
-
-	phantom_stream->timing.v_addressable = phantom_vactive;
-	phantom_stream->timing.v_front_porch = 1;
-	phantom_stream->timing.v_total = phantom_stream->timing.v_addressable +
-						phantom_stream->timing.v_front_porch +
-						phantom_stream->timing.v_sync_width +
-						phantom_bp;
-}
-
-static struct dc_stream_state *dml_enable_phantom_stream(struct dc *dc,
-		struct dc_state *context,
-		struct pipe_ctx *ref_pipe)
-{
-	struct dc_stream_state *phantom_stream = NULL;
-
-	phantom_stream = dc_create_stream_for_sink(ref_pipe->stream->sink);
-	phantom_stream->signal = SIGNAL_TYPE_VIRTUAL;
-	phantom_stream->dpms_off = true;
-	phantom_stream->mall_stream_config.type = SUBVP_PHANTOM;
-	phantom_stream->mall_stream_config.paired_stream = ref_pipe->stream;
-	ref_pipe->stream->mall_stream_config.type = SUBVP_MAIN;
-	ref_pipe->stream->mall_stream_config.paired_stream = phantom_stream;
-
-	/* stream has limited viewport and small timing */
-	memcpy(&phantom_stream->timing, &ref_pipe->stream->timing, sizeof(phantom_stream->timing));
-	memcpy(&phantom_stream->src, &ref_pipe->stream->src, sizeof(phantom_stream->src));
-	memcpy(&phantom_stream->dst, &ref_pipe->stream->dst, sizeof(phantom_stream->dst));
-	dml_set_phantom_stream_timing(dc, context, ref_pipe, phantom_stream);
-
-	dc_add_stream_to_ctx(dc, context, phantom_stream);
-	dc->hwss.apply_ctx_to_hw(dc, context);
-	return phantom_stream;
-}
-
-static void dml_enable_phantom_plane(struct dc *dc,
-		struct dc_state *context,
-		struct dc_stream_state *phantom_stream,
-		struct pipe_ctx *main_pipe)
-{
-	struct dc_plane_state *phantom_plane = NULL;
-	struct dc_plane_state *prev_phantom_plane = NULL;
-	struct pipe_ctx *curr_pipe = main_pipe;
-
-	while (curr_pipe) {
-		if (curr_pipe->top_pipe && curr_pipe->top_pipe->plane_state == curr_pipe->plane_state)
-			phantom_plane = prev_phantom_plane;
-		else
-			phantom_plane = dc_create_plane_state(dc);
-
-		memcpy(&phantom_plane->address, &curr_pipe->plane_state->address, sizeof(phantom_plane->address));
-		memcpy(&phantom_plane->scaling_quality, &curr_pipe->plane_state->scaling_quality,
-				sizeof(phantom_plane->scaling_quality));
-		memcpy(&phantom_plane->src_rect, &curr_pipe->plane_state->src_rect, sizeof(phantom_plane->src_rect));
-		memcpy(&phantom_plane->dst_rect, &curr_pipe->plane_state->dst_rect, sizeof(phantom_plane->dst_rect));
-		memcpy(&phantom_plane->clip_rect, &curr_pipe->plane_state->clip_rect, sizeof(phantom_plane->clip_rect));
-		memcpy(&phantom_plane->plane_size, &curr_pipe->plane_state->plane_size,
-				sizeof(phantom_plane->plane_size));
-		memcpy(&phantom_plane->tiling_info, &curr_pipe->plane_state->tiling_info,
-				sizeof(phantom_plane->tiling_info));
-		memcpy(&phantom_plane->dcc, &curr_pipe->plane_state->dcc, sizeof(phantom_plane->dcc));
-		/* Currently compat_level is undefined in dc_state
-		* phantom_plane->compat_level = curr_pipe->plane_state->compat_level;
-		*/
-		phantom_plane->format = curr_pipe->plane_state->format;
-		phantom_plane->rotation = curr_pipe->plane_state->rotation;
-		phantom_plane->visible = curr_pipe->plane_state->visible;
-
-		/* Shadow pipe has small viewport. */
-		phantom_plane->clip_rect.y = 0;
-		phantom_plane->clip_rect.height = phantom_stream->timing.v_addressable;
-
-		dc_add_plane_to_context(dc, phantom_stream, phantom_plane, context);
-
-		curr_pipe = curr_pipe->bottom_pipe;
-		prev_phantom_plane = phantom_plane;
-	}
-}
-
-static void dml_add_phantom_pipes(struct dc *dc, struct dc_state *context)
-{
-	int i = 0;
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-		struct dc_stream_state *ref_stream = pipe->stream;
-		// Only construct phantom stream for top pipes that have plane enabled
-		if (!pipe->top_pipe && pipe->plane_state && pipe->stream &&
-				pipe->stream->mall_stream_config.type == SUBVP_NONE) {
-			struct dc_stream_state *phantom_stream = NULL;
-
-			phantom_stream = dml_enable_phantom_stream(dc, context, pipe);
-			dml_enable_phantom_plane(dc, context, phantom_stream, pipe);
-		}
-	}
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
-		if (pipe->plane_state && pipe->stream &&
-				pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
-			pipe->stream->use_dynamic_meta = false;
-			pipe->plane_state->flip_immediate = false;
-			if (!resource_build_scaling_params(pipe)) {
-				// Log / remove phantom pipes since failed to build scaling params
-			}
-		}
-	}
-}
-
-static void dml_remove_phantom_pipes(struct dc *dc, struct dc_state *context)
-{
-	int i;
-	bool removed_pipe = false;
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-		// build scaling params for phantom pipes
-		if (pipe->plane_state && pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
-			dc_rem_all_planes_for_stream(dc, pipe->stream, context);
-			dc_remove_stream_from_ctx(dc, context, pipe->stream);
-			removed_pipe = true;
-		}
-
-		// Clear all phantom stream info
-		if (pipe->stream) {
-			pipe->stream->mall_stream_config.type = SUBVP_NONE;
-			pipe->stream->mall_stream_config.paired_stream = NULL;
-		}
-	}
-	if (removed_pipe)
-		dc->hwss.apply_ctx_to_hw(dc, context);
-}
-
-/*
- * If the input state contains no upstream planes for a particular pipe (i.e. only timing)
- * we need to populate some "conservative" plane information as DML cannot handle "no planes"
- */
-static void populate_default_plane_from_timing(const struct dc_crtc_timing *timing, struct _vcs_dpi_display_pipe_params_st *pipe)
-{
-	pipe->src.is_hsplit = pipe->dest.odm_combine != dm_odm_combine_mode_disabled;
-	pipe->src.source_scan = dm_horz;
-	pipe->src.sw_mode = dm_sw_4kb_s;
-	pipe->src.macro_tile_size = dm_64k_tile;
-	pipe->src.viewport_width = timing->h_addressable;
-	if (pipe->src.viewport_width > 1920)
-		pipe->src.viewport_width = 1920;
-	pipe->src.viewport_height = timing->v_addressable;
-	if (pipe->src.viewport_height > 1080)
-		pipe->src.viewport_height = 1080;
-	pipe->src.surface_height_y = pipe->src.viewport_height;
-	pipe->src.surface_width_y = pipe->src.viewport_width;
-	pipe->src.surface_height_c = pipe->src.viewport_height;
-	pipe->src.surface_width_c = pipe->src.viewport_width;
-	pipe->src.data_pitch = ((pipe->src.viewport_width + 255) / 256) * 256;
-	pipe->src.source_format = dm_444_32;
-	pipe->dest.recout_width = pipe->src.viewport_width;
-	pipe->dest.recout_height = pipe->src.viewport_height;
-	pipe->dest.full_recout_width = pipe->dest.recout_width;
-	pipe->dest.full_recout_height = pipe->dest.recout_height;
-	pipe->scale_ratio_depth.lb_depth = dm_lb_16;
-	pipe->scale_ratio_depth.hscl_ratio = 1.0;
-	pipe->scale_ratio_depth.vscl_ratio = 1.0;
-	pipe->scale_ratio_depth.scl_enable = 0;
-	pipe->scale_taps.htaps = 1;
-	pipe->scale_taps.vtaps = 1;
-	pipe->dest.vtotal_min = timing->v_total;
-	pipe->dest.vtotal_max = timing->v_total;
-
-	if (pipe->dest.odm_combine == dm_odm_combine_mode_2to1) {
-		pipe->src.viewport_width /= 2;
-		pipe->dest.recout_width /= 2;
-	} else if (pipe->dest.odm_combine == dm_odm_combine_mode_4to1) {
-		pipe->src.viewport_width /= 4;
-		pipe->dest.recout_width /= 4;
-	}
-
-	pipe->src.dcc = false;
-	pipe->src.dcc_rate = 1;
-}
-
-/*
- * If the pipe is not blending (i.e. pipe_ctx->top pipe == null) then its
- * hsplit group is equal to its own pipe ID
- * Otherwise, all pipes part of the same blending tree have the same hsplit group
- * ID as the top most pipe
- *
- * If the pipe ctx is ODM combined, then similar logic follows
- */
-static void populate_hsplit_group_from_dc_pipe_ctx (const struct pipe_ctx *dc_pipe_ctx, struct _vcs_dpi_display_e2e_pipe_params_st *e2e_pipe)
-{
-	e2e_pipe->pipe.src.hsplit_grp = dc_pipe_ctx->pipe_idx;
-
-	if (dc_pipe_ctx->top_pipe && dc_pipe_ctx->top_pipe->plane_state
-			== dc_pipe_ctx->plane_state) {
-		struct pipe_ctx *first_pipe = dc_pipe_ctx->top_pipe;
-		int split_idx = 0;
-
-		while (first_pipe->top_pipe && first_pipe->top_pipe->plane_state
-				== dc_pipe_ctx->plane_state) {
-			first_pipe = first_pipe->top_pipe;
-			split_idx++;
-		}
-
-		/* Treat 4to1 mpc combine as an mpo of 2 2-to-1 combines */
-		if (split_idx == 0)
-			e2e_pipe->pipe.src.hsplit_grp = first_pipe->pipe_idx;
-		else if (split_idx == 1)
-			e2e_pipe->pipe.src.hsplit_grp = dc_pipe_ctx->pipe_idx;
-		else if (split_idx == 2)
-			e2e_pipe->pipe.src.hsplit_grp = dc_pipe_ctx->top_pipe->pipe_idx;
-
-	} else if (dc_pipe_ctx->prev_odm_pipe) {
-		struct pipe_ctx *first_pipe = dc_pipe_ctx->prev_odm_pipe;
-
-		while (first_pipe->prev_odm_pipe)
-			first_pipe = first_pipe->prev_odm_pipe;
-		e2e_pipe->pipe.src.hsplit_grp = first_pipe->pipe_idx;
-	}
-}
-
-static void populate_dml_from_dc_pipe_ctx (const struct pipe_ctx *dc_pipe_ctx, struct _vcs_dpi_display_e2e_pipe_params_st *e2e_pipe, int always_scale)
-{
-	const struct dc_plane_state *pln = dc_pipe_ctx->plane_state;
-	const struct scaler_data *scl = &dc_pipe_ctx->plane_res.scl_data;
-
-	e2e_pipe->pipe.src.immediate_flip = pln->flip_immediate;
-	e2e_pipe->pipe.src.is_hsplit = (dc_pipe_ctx->bottom_pipe && dc_pipe_ctx->bottom_pipe->plane_state == pln)
-			|| (dc_pipe_ctx->top_pipe && dc_pipe_ctx->top_pipe->plane_state == pln)
-			|| e2e_pipe->pipe.dest.odm_combine != dm_odm_combine_mode_disabled;
-
-	/* stereo is not split */
-	if (pln->stereo_format == PLANE_STEREO_FORMAT_SIDE_BY_SIDE ||
-		pln->stereo_format == PLANE_STEREO_FORMAT_TOP_AND_BOTTOM) {
-		e2e_pipe->pipe.src.is_hsplit = false;
-		e2e_pipe->pipe.src.hsplit_grp = dc_pipe_ctx->pipe_idx;
-	}
-
-	e2e_pipe->pipe.src.source_scan = pln->rotation == ROTATION_ANGLE_90
-			|| pln->rotation == ROTATION_ANGLE_270 ? dm_vert : dm_horz;
-	e2e_pipe->pipe.src.viewport_y_y = scl->viewport.y;
-	e2e_pipe->pipe.src.viewport_y_c = scl->viewport_c.y;
-	e2e_pipe->pipe.src.viewport_width = scl->viewport.width;
-	e2e_pipe->pipe.src.viewport_width_c = scl->viewport_c.width;
-	e2e_pipe->pipe.src.viewport_height = scl->viewport.height;
-	e2e_pipe->pipe.src.viewport_height_c = scl->viewport_c.height;
-	e2e_pipe->pipe.src.viewport_width_max = pln->src_rect.width;
-	e2e_pipe->pipe.src.viewport_height_max = pln->src_rect.height;
-	e2e_pipe->pipe.src.surface_width_y = pln->plane_size.surface_size.width;
-	e2e_pipe->pipe.src.surface_height_y = pln->plane_size.surface_size.height;
-	e2e_pipe->pipe.src.surface_width_c = pln->plane_size.chroma_size.width;
-	e2e_pipe->pipe.src.surface_height_c = pln->plane_size.chroma_size.height;
-
-	if (pln->format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA
-			|| pln->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) {
-		e2e_pipe->pipe.src.data_pitch = pln->plane_size.surface_pitch;
-		e2e_pipe->pipe.src.data_pitch_c = pln->plane_size.chroma_pitch;
-		e2e_pipe->pipe.src.meta_pitch = pln->dcc.meta_pitch;
-		e2e_pipe->pipe.src.meta_pitch_c = pln->dcc.meta_pitch_c;
-	} else {
-		e2e_pipe->pipe.src.data_pitch = pln->plane_size.surface_pitch;
-		e2e_pipe->pipe.src.meta_pitch = pln->dcc.meta_pitch;
-	}
-	e2e_pipe->pipe.src.dcc = pln->dcc.enable;
-	e2e_pipe->pipe.src.dcc_rate = 1;
-	e2e_pipe->pipe.dest.recout_width = scl->recout.width;
-	e2e_pipe->pipe.dest.recout_height = scl->recout.height;
-	e2e_pipe->pipe.dest.full_recout_height = scl->recout.height;
-	e2e_pipe->pipe.dest.full_recout_width = scl->recout.width;
-	if (e2e_pipe->pipe.dest.odm_combine == dm_odm_combine_mode_2to1)
-		e2e_pipe->pipe.dest.full_recout_width *= 2;
-	else if (e2e_pipe->pipe.dest.odm_combine == dm_odm_combine_mode_4to1)
-		e2e_pipe->pipe.dest.full_recout_width *= 4;
-	else {
-		struct pipe_ctx *split_pipe = dc_pipe_ctx->bottom_pipe;
-
-		while (split_pipe && split_pipe->plane_state == pln) {
-			e2e_pipe->pipe.dest.full_recout_width += split_pipe->plane_res.scl_data.recout.width;
-			split_pipe = split_pipe->bottom_pipe;
-		}
-		split_pipe = dc_pipe_ctx->top_pipe;
-		while (split_pipe && split_pipe->plane_state == pln) {
-			e2e_pipe->pipe.dest.full_recout_width += split_pipe->plane_res.scl_data.recout.width;
-			split_pipe = split_pipe->top_pipe;
-		}
-	}
-
-	e2e_pipe->pipe.scale_ratio_depth.lb_depth = dm_lb_16;
-	e2e_pipe->pipe.scale_ratio_depth.hscl_ratio = (double) scl->ratios.horz.value / (1ULL<<32);
-	e2e_pipe->pipe.scale_ratio_depth.hscl_ratio_c = (double) scl->ratios.horz_c.value / (1ULL<<32);
-	e2e_pipe->pipe.scale_ratio_depth.vscl_ratio = (double) scl->ratios.vert.value / (1ULL<<32);
-	e2e_pipe->pipe.scale_ratio_depth.vscl_ratio_c = (double) scl->ratios.vert_c.value / (1ULL<<32);
-	e2e_pipe->pipe.scale_ratio_depth.scl_enable =
-			scl->ratios.vert.value != dc_fixpt_one.value
-			|| scl->ratios.horz.value != dc_fixpt_one.value
-			|| scl->ratios.vert_c.value != dc_fixpt_one.value
-			|| scl->ratios.horz_c.value != dc_fixpt_one.value /*Lb only or Full scl*/
-			|| always_scale; /*support always scale*/
-	e2e_pipe->pipe.scale_taps.htaps = scl->taps.h_taps;
-	e2e_pipe->pipe.scale_taps.htaps_c = scl->taps.h_taps_c;
-	e2e_pipe->pipe.scale_taps.vtaps = scl->taps.v_taps;
-	e2e_pipe->pipe.scale_taps.vtaps_c = scl->taps.v_taps_c;
-
-	/* Currently compat_level is not defined. Commenting it until further resolution
-	 * if (pln->compat_level == DC_LEGACY_TILING_ADDR_GEN_TWO) {
-		swizzle_to_dml_params(pln->tiling_info.gfx9.swizzle,
-				&e2e_pipe->pipe.src.sw_mode);
-		e2e_pipe->pipe.src.macro_tile_size =
-				swizzle_mode_to_macro_tile_size(pln->tiling_info.gfx9.swizzle);
-	} else {
-		gfx10array_mode_to_dml_params(pln->tiling_info.gfx10compatible.array_mode,
-				pln->compat_level,
-				&e2e_pipe->pipe.src.sw_mode);
-		e2e_pipe->pipe.src.macro_tile_size = dm_4k_tile;
-	}*/
-
-	e2e_pipe->pipe.src.source_format = dc_source_format_to_dml_source_format(pln->format);
-}
-
-static void populate_dml_cursor_parameters_from_dc_pipe_ctx (const struct pipe_ctx *dc_pipe_ctx, struct _vcs_dpi_display_e2e_pipe_params_st *e2e_pipe)
-{
-	/*
-	* For graphic plane, cursor number is 1, nv12 is 0
-	* bw calculations due to cursor on/off
-	*/
-	if (dc_pipe_ctx->plane_state &&
-			(dc_pipe_ctx->plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE ||
-			dc_pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM))
-		e2e_pipe->pipe.src.num_cursors = 0;
-	else
-		e2e_pipe->pipe.src.num_cursors = 1;
-
-	e2e_pipe->pipe.src.cur0_src_width = 256;
-	e2e_pipe->pipe.src.cur0_bpp = dm_cur_32bit;
-}
-
-static int populate_dml_pipes_from_context_base(
-		struct dc *dc,
-		struct dc_state *context,
-		display_e2e_pipe_params_st *pipes,
-		bool fast_validate)
-{
-	int pipe_cnt, i;
-	bool synchronized_vblank = true;
-	struct resource_context *res_ctx = &context->res_ctx;
-
-	for (i = 0, pipe_cnt = -1; i < dc->res_pool->pipe_count; i++) {
-		if (!res_ctx->pipe_ctx[i].stream)
-			continue;
-
-		if (pipe_cnt < 0) {
-			pipe_cnt = i;
-			continue;
-		}
-
-		if (res_ctx->pipe_ctx[pipe_cnt].stream == res_ctx->pipe_ctx[i].stream)
-			continue;
-
-		if (dc->debug.disable_timing_sync ||
-			(!resource_are_streams_timing_synchronizable(
-				res_ctx->pipe_ctx[pipe_cnt].stream,
-				res_ctx->pipe_ctx[i].stream) &&
-			!resource_are_vblanks_synchronizable(
-				res_ctx->pipe_ctx[pipe_cnt].stream,
-				res_ctx->pipe_ctx[i].stream))) {
-			synchronized_vblank = false;
-			break;
-		}
-	}
-
-	for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
-		struct dc_crtc_timing *timing = &res_ctx->pipe_ctx[i].stream->timing;
-
-		struct audio_check aud_check = {0};
-		if (!res_ctx->pipe_ctx[i].stream)
-			continue;
-
-		/* todo:
-		pipes[pipe_cnt].pipe.src.dynamic_metadata_enable = 0;
-		pipes[pipe_cnt].pipe.src.dcc = 0;
-		pipes[pipe_cnt].pipe.src.vm = 0;*/
-
-		pipes[pipe_cnt].clks_cfg.refclk_mhz = dc->res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000.0;
-
-		pipes[pipe_cnt].dout.dsc_enable = res_ctx->pipe_ctx[i].stream->timing.flags.DSC;
-		/* todo: rotation?*/
-		pipes[pipe_cnt].dout.dsc_slices = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.num_slices_h;
-		if (res_ctx->pipe_ctx[i].stream->use_dynamic_meta) {
-			pipes[pipe_cnt].pipe.src.dynamic_metadata_enable = true;
-			/* 1/2 vblank */
-			pipes[pipe_cnt].pipe.src.dynamic_metadata_lines_before_active =
-				(timing->v_total - timing->v_addressable
-					- timing->v_border_top - timing->v_border_bottom) / 2;
-			/* 36 bytes dp, 32 hdmi */
-			pipes[pipe_cnt].pipe.src.dynamic_metadata_xmit_bytes =
-				dc_is_dp_signal(res_ctx->pipe_ctx[i].stream->signal) ? 36 : 32;
-		}
-		pipes[pipe_cnt].pipe.dest.synchronized_vblank_all_planes = synchronized_vblank;
-
-		dc_timing_to_dml_timing(timing, &pipes[pipe_cnt].pipe.dest);
-		pipes[pipe_cnt].pipe.dest.vtotal_min = res_ctx->pipe_ctx[i].stream->adjust.v_total_min;
-		pipes[pipe_cnt].pipe.dest.vtotal_max = res_ctx->pipe_ctx[i].stream->adjust.v_total_max;
-
-		pipes[pipe_cnt].pipe.dest.otg_inst = res_ctx->pipe_ctx[i].stream_res.tg->inst;
-
-		pipes[pipe_cnt].pipe.dest.odm_combine = get_dml_odm_combine(&res_ctx->pipe_ctx[i]);
-
-		populate_hsplit_group_from_dc_pipe_ctx(&res_ctx->pipe_ctx[i], &pipes[pipe_cnt]);
-
-		pipes[pipe_cnt].dout.dp_lanes = 4;
-		pipes[pipe_cnt].dout.is_virtual = 0;
-		pipes[pipe_cnt].dout.output_type = get_dml_output_type(res_ctx->pipe_ctx[i].stream->signal);
-		if (pipes[pipe_cnt].dout.output_type < 0) {
-			pipes[pipe_cnt].dout.output_type = dm_dp;
-			pipes[pipe_cnt].dout.is_virtual = 1;
-		}
-
-		populate_color_depth_and_encoding_from_timing(&res_ctx->pipe_ctx[i].stream->timing, &pipes[pipe_cnt].dout);
-
-		if (res_ctx->pipe_ctx[i].stream->timing.flags.DSC)
-			pipes[pipe_cnt].dout.output_bpp = res_ctx->pipe_ctx[i].stream->timing.dsc_cfg.bits_per_pixel / 16.0;
-
-		/* todo: default max for now, until there is logic reflecting this in dc*/
-		pipes[pipe_cnt].dout.dsc_input_bpc = 12;
-		/*fill up the audio sample rate (unit in kHz)*/
-		get_audio_check(&res_ctx->pipe_ctx[i].stream->audio_info, &aud_check);
-		pipes[pipe_cnt].dout.max_audio_sample_rate = aud_check.max_audiosample_rate / 1000;
-
-		populate_dml_cursor_parameters_from_dc_pipe_ctx(&res_ctx->pipe_ctx[i], &pipes[pipe_cnt]);
-
-		if (!res_ctx->pipe_ctx[i].plane_state) {
-			populate_default_plane_from_timing(timing, &pipes[pipe_cnt].pipe);
-		} else {
-			populate_dml_from_dc_pipe_ctx(&res_ctx->pipe_ctx[i], &pipes[pipe_cnt], dc->debug.always_scale);
-		}
-
-		pipe_cnt++;
-	}
-
-	/* populate writeback information */
-	if (dc->res_pool)
-		dc->res_pool->funcs->populate_dml_writeback_from_context(dc, res_ctx, pipes);
-
-	return pipe_cnt;
-}
-
-static int dml_populate_dml_pipes_from_context(
-	struct dc *dc, struct dc_state *context,
-	display_e2e_pipe_params_st *pipes,
-	bool fast_validate)
-{
-	int i, pipe_cnt;
-	struct resource_context *res_ctx = &context->res_ctx;
-	struct pipe_ctx *pipe = NULL;	// Fix potentially uninitialized error from VS
-
-	populate_dml_pipes_from_context_base(dc, context, pipes, fast_validate);
-
-	for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
-		struct dc_crtc_timing *timing;
-
-		if (!res_ctx->pipe_ctx[i].stream)
-			continue;
-		pipe = &res_ctx->pipe_ctx[i];
-		timing = &pipe->stream->timing;
-
-		pipes[pipe_cnt].pipe.src.gpuvm = true;
-		pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
-		pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
-		pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
-
-		pipes[pipe_cnt].dout.dsc_input_bpc = 0;
-		if (pipes[pipe_cnt].dout.dsc_enable) {
-			switch (timing->display_color_depth) {
-			case COLOR_DEPTH_888:
-				pipes[pipe_cnt].dout.dsc_input_bpc = 8;
-				break;
-			case COLOR_DEPTH_101010:
-				pipes[pipe_cnt].dout.dsc_input_bpc = 10;
-				break;
-			case COLOR_DEPTH_121212:
-				pipes[pipe_cnt].dout.dsc_input_bpc = 12;
-				break;
-			default:
-				ASSERT(0);
-				break;
-			}
-		}
-		pipe_cnt++;
-	}
-	dc->config.enable_4to1MPC = false;
-	if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) {
-		if (is_dual_plane(pipe->plane_state->format)
-				&& pipe->plane_state->src_rect.width <= 1920 && pipe->plane_state->src_rect.height <= 1080) {
-			dc->config.enable_4to1MPC = true;
-		} else if (!is_dual_plane(pipe->plane_state->format)) {
-			context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
-			pipes[0].pipe.src.unbounded_req_mode = true;
-		}
-	}
-
-	return pipe_cnt;
-}
-
-static void dml_full_validate_bw_helper(struct dc *dc,
-		struct dc_state *context,
-		display_e2e_pipe_params_st *pipes,
-		int *vlevel,
-		int *split,
-		bool *merge,
-		int *pipe_cnt)
-{
-	struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
-
-	/*
-	 * DML favors voltage over p-state, but we're more interested in
-	 * supporting p-state over voltage. We can't support p-state in
-	 * prefetch mode > 0 so try capping the prefetch mode to start.
-	 */
-	context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank =
-		dm_allow_self_refresh_and_mclk_switch;
-	*vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt);
-	/* This may adjust vlevel and maxMpcComb */
-	if (*vlevel < context->bw_ctx.dml.soc.num_states)
-		*vlevel = dml_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge);
-
-	/* Conditions for setting up phantom pipes for SubVP:
-	 * 1. Not force disable SubVP
-	 * 2. Full update (i.e. !fast_validate)
-	 * 3. Enough pipes are available to support SubVP (TODO: Which pipes will use VACTIVE / VBLANK / SUBVP?)
-	 * 4. Display configuration passes validation
-	 * 5. (Config doesn't support MCLK in VACTIVE/VBLANK || dc->debug.force_subvp_mclk_switch)
-	 */
-	if (!dc->debug.force_disable_subvp &&
-			dml_enough_pipes_for_subvp(dc, context) &&
-			*vlevel < context->bw_ctx.dml.soc.num_states &&
-			(vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported ||
-			dc->debug.force_subvp_mclk_switch)) {
-
-		dml_add_phantom_pipes(dc, context);
-
-		 /* Create input to DML based on new context which includes phantom pipes
-		  * TODO: Input to DML should mark which pipes are phantom
-		  */
-		*pipe_cnt = dml_populate_dml_pipes_from_context(dc, context, pipes, false);
-		*vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, *pipe_cnt);
-		if (*vlevel < context->bw_ctx.dml.soc.num_states) {
-			memset(split, 0, MAX_PIPES * sizeof(*split));
-			memset(merge, 0, MAX_PIPES * sizeof(*merge));
-			*vlevel = dml_validate_apply_pipe_split_flags(dc, context, *vlevel, split, merge);
-		}
-
-		// If SubVP pipe config is unsupported (or cannot be used for UCLK switching)
-		// remove phantom pipes and repopulate dml pipes
-		if (*vlevel == context->bw_ctx.dml.soc.num_states ||
-				vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) {
-			dml_remove_phantom_pipes(dc, context);
-			*pipe_cnt = dml_populate_dml_pipes_from_context(dc, context, pipes, false);
-		}
-	}
-}
-
-static void dcn20_adjust_adaptive_sync_v_startup(
-		const struct dc_crtc_timing *dc_crtc_timing, int *vstartup_start)
-{
-	struct dc_crtc_timing patched_crtc_timing;
-	uint32_t asic_blank_end   = 0;
-	uint32_t asic_blank_start = 0;
-	uint32_t newVstartup	  = 0;
-
-	patched_crtc_timing = *dc_crtc_timing;
-
-	if (patched_crtc_timing.flags.INTERLACE == 1) {
-		if (patched_crtc_timing.v_front_porch < 2)
-			patched_crtc_timing.v_front_porch = 2;
-	} else {
-		if (patched_crtc_timing.v_front_porch < 1)
-			patched_crtc_timing.v_front_porch = 1;
-	}
-
-	/* blank_start = frame end - front porch */
-	asic_blank_start = patched_crtc_timing.v_total -
-					patched_crtc_timing.v_front_porch;
-
-	/* blank_end = blank_start - active */
-	asic_blank_end = asic_blank_start -
-					patched_crtc_timing.v_border_bottom -
-					patched_crtc_timing.v_addressable -
-					patched_crtc_timing.v_border_top;
-
-	newVstartup = asic_blank_end + (patched_crtc_timing.v_total - asic_blank_start);
-
-	*vstartup_start = ((newVstartup > *vstartup_start) ? newVstartup : *vstartup_start);
-}
-
-static bool is_dp_128b_132b_signal(struct pipe_ctx *pipe_ctx)
-{
-	return (pipe_ctx->stream_res.hpo_dp_stream_enc &&
-			pipe_ctx->link_res.hpo_dp_link_enc &&
-			dc_is_dp_signal(pipe_ctx->stream->signal));
-}
-
-static bool is_dtbclk_required(struct dc *dc, struct dc_state *context)
-{
-	int i;
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		if (!context->res_ctx.pipe_ctx[i].stream)
-			continue;
-		if (is_dp_128b_132b_signal(&context->res_ctx.pipe_ctx[i]))
-			return true;
-	}
-	return false;
-}
-
-static void dml_update_soc_for_wm_a(struct dc *dc, struct dc_state *context)
-{
-	if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].valid) {
-		context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
-		context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us;
-		context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us;
-		context->bw_ctx.dml.soc.fclk_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us;
-	}
-}
-
-static bool dml_internal_validate(
-		struct dc *dc,
-		struct dc_state *context,
-		display_e2e_pipe_params_st *pipes,
-		int *pipe_cnt_out,
-		int *vlevel_out,
-		bool fast_validate)
-{
-	bool out = false;
-	bool repopulate_pipes = false;
-	int split[MAX_PIPES] = { 0 };
-	bool merge[MAX_PIPES] = { false };
-	bool newly_split[MAX_PIPES] = { false };
-	int pipe_cnt, i, pipe_idx, vlevel;
-	struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
-
-	ASSERT(pipes);
-	if (!pipes)
-		return false;
-
-	// For each full update, remove all existing phantom pipes first
-	dml_remove_phantom_pipes(dc, context);
-
-	dml_update_soc_for_wm_a(dc, context);
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
-		if (pipe->plane_state) {
-			// On initial pass through DML, we intend to use MALL for SS on all
-			// (non-PSR) surfaces with none using MALL for P-State
-			// 'mall_plane_config': is not a member of 'dc_plane_state' - commenting it out till mall_plane_config gets supported in dc_plant_state
-			//if (pipe->stream && pipe->stream->link->psr_settings.psr_version == DC_PSR_VERSION_UNSUPPORTED)
-			//	pipe->plane_state->mall_plane_config.use_mall_for_ss = true;
-		}
-	}
-	pipe_cnt = dml_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
-
-	if (!pipe_cnt) {
-		out = true;
-		goto validate_out;
-	}
-
-	dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt);
-
-	if (!fast_validate) {
-		dml_full_validate_bw_helper(dc, context, pipes, &vlevel, split, merge, &pipe_cnt);
-	}
-
-	if (fast_validate || vlevel == context->bw_ctx.dml.soc.num_states ||
-			vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported) {
-		/*
-		 * If mode is unsupported or there's still no p-state support then
-		 * fall back to favoring voltage.
-		 *
-		 * We don't actually support prefetch mode 2, so require that we
-		 * at least support prefetch mode 1.
-		 */
-		context->bw_ctx.dml.soc.allow_dram_self_refresh_or_dram_clock_change_in_vblank =
-			dm_allow_self_refresh;
-
-		vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
-		if (vlevel < context->bw_ctx.dml.soc.num_states) {
-			memset(split, 0, sizeof(split));
-			memset(merge, 0, sizeof(merge));
-			vlevel = dml_validate_apply_pipe_split_flags(dc, context, vlevel, split, merge);
-		}
-	}
-
-	dml_log_mode_support_params(&context->bw_ctx.dml);
-
-	if (vlevel == context->bw_ctx.dml.soc.num_states)
-		goto validate_fail;
-
-	for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-		struct pipe_ctx *mpo_pipe = pipe->bottom_pipe;
-
-		if (!pipe->stream)
-			continue;
-
-		/* We only support full screen mpo with ODM */
-		if (vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled
-				&& pipe->plane_state && mpo_pipe
-				&& memcmp(&mpo_pipe->plane_res.scl_data.recout,
-						&pipe->plane_res.scl_data.recout,
-						sizeof(struct rect)) != 0) {
-			ASSERT(mpo_pipe->plane_state != pipe->plane_state);
-			goto validate_fail;
-		}
-		pipe_idx++;
-	}
-
-	/* merge pipes if necessary */
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
-		/*skip pipes that don't need merging*/
-		if (!merge[i])
-			continue;
-
-		/* if ODM merge we ignore mpc tree, mpo pipes will have their own flags */
-		if (pipe->prev_odm_pipe) {
-			/*split off odm pipe*/
-			pipe->prev_odm_pipe->next_odm_pipe = pipe->next_odm_pipe;
-			if (pipe->next_odm_pipe)
-				pipe->next_odm_pipe->prev_odm_pipe = pipe->prev_odm_pipe;
-
-			pipe->bottom_pipe = NULL;
-			pipe->next_odm_pipe = NULL;
-			pipe->plane_state = NULL;
-			pipe->stream = NULL;
-			pipe->top_pipe = NULL;
-			pipe->prev_odm_pipe = NULL;
-			if (pipe->stream_res.dsc)
-				dml_release_dsc(&context->res_ctx, dc->res_pool, &pipe->stream_res.dsc);
-			memset(&pipe->plane_res, 0, sizeof(pipe->plane_res));
-			memset(&pipe->stream_res, 0, sizeof(pipe->stream_res));
-			repopulate_pipes = true;
-		} else if (pipe->top_pipe && pipe->top_pipe->plane_state == pipe->plane_state) {
-			struct pipe_ctx *top_pipe = pipe->top_pipe;
-			struct pipe_ctx *bottom_pipe = pipe->bottom_pipe;
-
-			top_pipe->bottom_pipe = bottom_pipe;
-			if (bottom_pipe)
-				bottom_pipe->top_pipe = top_pipe;
-
-			pipe->top_pipe = NULL;
-			pipe->bottom_pipe = NULL;
-			pipe->plane_state = NULL;
-			pipe->stream = NULL;
-			memset(&pipe->plane_res, 0, sizeof(pipe->plane_res));
-			memset(&pipe->stream_res, 0, sizeof(pipe->stream_res));
-			repopulate_pipes = true;
-		} else
-			ASSERT(0); /* Should never try to merge master pipe */
-
-	}
-
-	for (i = 0, pipe_idx = -1; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-		struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
-		struct pipe_ctx *hsplit_pipe = NULL;
-		bool odm;
-		int old_index = -1;
-
-		if (!pipe->stream || newly_split[i])
-			continue;
-
-		pipe_idx++;
-		odm = vba->ODMCombineEnabled[vba->pipe_plane[pipe_idx]] != dm_odm_combine_mode_disabled;
-
-		if (!pipe->plane_state && !odm)
-			continue;
-
-		if (split[i]) {
-			if (odm) {
-				if (split[i] == 4 && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe)
-					old_index = old_pipe->next_odm_pipe->next_odm_pipe->pipe_idx;
-				else if (old_pipe->next_odm_pipe)
-					old_index = old_pipe->next_odm_pipe->pipe_idx;
-			} else {
-				if (split[i] == 4 && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe &&
-						old_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
-					old_index = old_pipe->bottom_pipe->bottom_pipe->pipe_idx;
-				else if (old_pipe->bottom_pipe &&
-						old_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
-					old_index = old_pipe->bottom_pipe->pipe_idx;
-			}
-			hsplit_pipe = dml_find_split_pipe(dc, context, old_index);
-			ASSERT(hsplit_pipe);
-			if (!hsplit_pipe)
-				goto validate_fail;
-
-			if (!dml_split_stream_for_mpc_or_odm(
-					dc, &context->res_ctx,
-					pipe, hsplit_pipe, odm))
-				goto validate_fail;
-
-			newly_split[hsplit_pipe->pipe_idx] = true;
-			repopulate_pipes = true;
-		}
-		if (split[i] == 4) {
-			struct pipe_ctx *pipe_4to1;
-
-			if (odm && old_pipe->next_odm_pipe)
-				old_index = old_pipe->next_odm_pipe->pipe_idx;
-			else if (!odm && old_pipe->bottom_pipe &&
-						old_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
-				old_index = old_pipe->bottom_pipe->pipe_idx;
-			else
-				old_index = -1;
-			pipe_4to1 = dml_find_split_pipe(dc, context, old_index);
-			ASSERT(pipe_4to1);
-			if (!pipe_4to1)
-				goto validate_fail;
-			if (!dml_split_stream_for_mpc_or_odm(
-					dc, &context->res_ctx,
-					pipe, pipe_4to1, odm))
-				goto validate_fail;
-			newly_split[pipe_4to1->pipe_idx] = true;
-
-			if (odm && old_pipe->next_odm_pipe && old_pipe->next_odm_pipe->next_odm_pipe
-					&& old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe)
-				old_index = old_pipe->next_odm_pipe->next_odm_pipe->next_odm_pipe->pipe_idx;
-			else if (!odm && old_pipe->bottom_pipe && old_pipe->bottom_pipe->bottom_pipe &&
-					old_pipe->bottom_pipe->bottom_pipe->bottom_pipe &&
-					old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->plane_state == old_pipe->plane_state)
-				old_index = old_pipe->bottom_pipe->bottom_pipe->bottom_pipe->pipe_idx;
-			else
-				old_index = -1;
-			pipe_4to1 = dml_find_split_pipe(dc, context, old_index);
-			ASSERT(pipe_4to1);
-			if (!pipe_4to1)
-				goto validate_fail;
-			if (!dml_split_stream_for_mpc_or_odm(
-					dc, &context->res_ctx,
-					hsplit_pipe, pipe_4to1, odm))
-				goto validate_fail;
-			newly_split[pipe_4to1->pipe_idx] = true;
-		}
-		if (odm)
-			dml_build_mapped_resource(dc, context, pipe->stream);
-	}
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-
-		if (pipe->plane_state) {
-			if (!resource_build_scaling_params(pipe))
-				goto validate_fail;
-		}
-	}
-
-	/* Actual dsc count per stream dsc validation*/
-	if (!dml_validate_dsc(dc, context)) {
-		vba->ValidationStatus[vba->soc.num_states] = DML_FAIL_DSC_VALIDATION_FAILURE;
-		goto validate_fail;
-	}
-
-	if (repopulate_pipes)
-		pipe_cnt = dml_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
-	*vlevel_out = vlevel;
-	*pipe_cnt_out = pipe_cnt;
-
-	out = true;
-	goto validate_out;
-
-validate_fail:
-	out = false;
-
-validate_out:
-	return out;
-}
-
-static void dml_calculate_dlg_params(
-		struct dc *dc, struct dc_state *context,
-		display_e2e_pipe_params_st *pipes,
-		int pipe_cnt,
-		int vlevel)
-{
-	int i, pipe_idx;
-	int plane_count;
-
-	/* Writeback MCIF_WB arbitration parameters */
-	if (dc->res_pool)
-		dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt);
-
-	context->bw_ctx.bw.dcn.clk.dispclk_khz = context->bw_ctx.dml.vba.DISPCLK * 1000;
-	context->bw_ctx.bw.dcn.clk.dcfclk_khz = context->bw_ctx.dml.vba.DCFCLK * 1000;
-	context->bw_ctx.bw.dcn.clk.socclk_khz = context->bw_ctx.dml.vba.SOCCLK * 1000;
-	context->bw_ctx.bw.dcn.clk.dramclk_khz = context->bw_ctx.dml.vba.DRAMSpeed * 1000 / 16;
-	context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = context->bw_ctx.dml.vba.DCFCLKDeepSleep * 1000;
-	context->bw_ctx.bw.dcn.clk.fclk_khz = context->bw_ctx.dml.vba.FabricClock * 1000;
-	context->bw_ctx.bw.dcn.clk.p_state_change_support =
-		context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb]
-							!= dm_dram_clock_change_unsupported;
-
-	context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
-	/* 'z9_support': is not a member of 'dc_clocks' - Commenting out till we have this support in dc_clocks
-	 * context->bw_ctx.bw.dcn.clk.z9_support = (context->bw_ctx.dml.vba.StutterPeriod > 5000.0) ?
-			DCN_Z9_SUPPORT_ALLOW : DCN_Z9_SUPPORT_DISALLOW;
-	*/
-	plane_count = 0;
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		if (context->res_ctx.pipe_ctx[i].plane_state)
-			plane_count++;
-	}
-
-	/* Commented out as per above error for now.
-	if (plane_count == 0)
-		context->bw_ctx.bw.dcn.clk.z9_support = DCN_Z9_SUPPORT_ALLOW;
-	*/
-	context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context);
-	context->bw_ctx.bw.dcn.clk.fclk_p_state_change_support =
-		context->bw_ctx.dml.vba.FCLKChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
-	if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz)
-		context->bw_ctx.bw.dcn.clk.dispclk_khz = dc->debug.min_disp_clk_khz;
-
-	for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
-		if (!context->res_ctx.pipe_ctx[i].stream)
-			continue;
-		pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
-		pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
-		pipes[pipe_idx].pipe.dest.vupdate_width = get_vupdate_width(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
-		pipes[pipe_idx].pipe.dest.vready_offset = get_vready_offset(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
-		if (context->res_ctx.pipe_ctx[i].stream->mall_stream_config.type == SUBVP_PHANTOM) {
-			// Phantom pipe requires that DET_SIZE = 0 and no unbounded requests
-			context->res_ctx.pipe_ctx[i].det_buffer_size_kb = 0;
-			context->res_ctx.pipe_ctx[i].unbounded_req = false;
-		} else {
-			context->res_ctx.pipe_ctx[i].det_buffer_size_kb = context->bw_ctx.dml.ip.det_buffer_size_kbytes;
-			context->res_ctx.pipe_ctx[i].unbounded_req = pipes[pipe_idx].pipe.src.unbounded_req_mode;
-		}
-
-		if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
-			context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
-		context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz =
-						pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
-		context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest;
-		pipe_idx++;
-	}
-	/*save a original dppclock copy*/
-	context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz;
-	context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz;
-	context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dppclk_mhz * 1000;
-	context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz = context->bw_ctx.dml.soc.clock_limits[vlevel].dispclk_mhz * 1000;
-	context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes
-						- context->bw_ctx.dml.ip.det_buffer_size_kbytes * pipe_idx;
-
-	for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
-		bool cstate_en = context->bw_ctx.dml.vba.PrefetchMode[vlevel][context->bw_ctx.dml.vba.maxMpcComb] != 2;
-
-		if (!context->res_ctx.pipe_ctx[i].stream)
-			continue;
-
-		context->bw_ctx.dml.funcs.rq_dlg_get_dlg_reg(&context->bw_ctx.dml,
-				&context->res_ctx.pipe_ctx[i].dlg_regs,
-				&context->res_ctx.pipe_ctx[i].ttu_regs,
-				pipes,
-				pipe_cnt,
-				pipe_idx,
-				cstate_en,
-				context->bw_ctx.bw.dcn.clk.p_state_change_support,
-				false, false, true);
-
-		context->bw_ctx.dml.funcs.rq_dlg_get_rq_reg(&context->bw_ctx.dml,
-				&context->res_ctx.pipe_ctx[i].rq_regs,
-				&pipes[pipe_idx].pipe);
-		pipe_idx++;
-	}
-}
-
-static void dml_calculate_wm_and_dlg(
-		struct dc *dc, struct dc_state *context,
-		display_e2e_pipe_params_st *pipes,
-		int pipe_cnt,
-		int vlevel)
-{
-	int i, pipe_idx, vlevel_temp = 0;
-
-	double dcfclk = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz;
-	double dcfclk_from_validation = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
-	unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
-	bool pstate_en = context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] !=
-			dm_dram_clock_change_unsupported;
-
-	/* Set B:
-	 * For Set B calculations use clocks from clock_limits[2] when available i.e. when SMU is present,
-	 * otherwise use arbitrary low value from spreadsheet for DCFCLK as lower is safer for watermark
-	 * calculations to cover bootup clocks.
-	 * DCFCLK: soc.clock_limits[2] when available
-	 * UCLK: soc.clock_limits[2] when available
-	 */
-	if (context->bw_ctx.dml.soc.num_states > 2) {
-		vlevel_temp = 2;
-		dcfclk = context->bw_ctx.dml.soc.clock_limits[2].dcfclk_mhz;
-	} else
-		dcfclk = 615; //DCFCLK Vmin_lv
-
-	pipes[0].clks_cfg.voltage = vlevel_temp;
-	pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
-	pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz;
-
-	if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) {
-		context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us;
-		context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us;
-		context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us;
-	}
-	context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	//context->bw_ctx.bw.dcn.watermarks.b.usr_retraining_ns = get_wm_usr_retraining(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-
-	/* Temporary, to have some fclk_pstate_change_ns and usr_retraining_ns wm values until DML is implemented */
-	//context->bw_ctx.bw.dcn.watermarks.b.usr_retraining = context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns / 8;
-
-	/* Set D:
-	 * All clocks min.
-	 * DCFCLK: Min, as reported by PM FW when available
-	 * UCLK  : Min, as reported by PM FW when available
-	 * sr_enter_exit/sr_exit should be lower than used for DRAM (TBD after bringup or later, use as decided in Clk Mgr)
-	 */
-
-	if (context->bw_ctx.dml.soc.num_states > 2) {
-		vlevel_temp = 0;
-		dcfclk = dc->clk_mgr->bw_params->clk_table.entries[0].dcfclk_mhz;
-	} else
-		dcfclk = 615; //DCFCLK Vmin_lv
-
-	pipes[0].clks_cfg.voltage = vlevel_temp;
-	pipes[0].clks_cfg.dcfclk_mhz = dcfclk;
-	pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel_temp].socclk_mhz;
-
-	if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) {
-		context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us;
-		context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us;
-		context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us;
-	}
-	context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	//context->bw_ctx.bw.dcn.watermarks.d.usr_retraining_ns = get_wm_usr_retraining(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-
-	/* Temporary, to have some fclk_pstate_change_ns and usr_retraining_ns wm values until DML is implemented */
-	//context->bw_ctx.bw.dcn.watermarks.d.usr_retraining = context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns / 8;
-	/* Set C, for Dummy P-State:
-	 * All clocks min.
-	 * DCFCLK: Min, as reported by PM FW, when available
-	 * UCLK  : Min,  as reported by PM FW, when available
-	 * pstate latency as per UCLK state dummy pstate latency
-	 */
-	if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) {
-		unsigned int min_dram_speed_mts_margin = 160;
-
-		if ((!pstate_en))
-			min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz * 16;
-
-		/* find largest table entry that is lower than dram speed, but lower than DPM0 still uses DPM0 */
-		for (i = 3; i > 0; i--)
-			if (min_dram_speed_mts + min_dram_speed_mts_margin > dc->clk_mgr->bw_params->dummy_pstate_table[i].dram_speed_mts)
-				break;
-
-		context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us;
-		context->bw_ctx.dml.soc.dummy_pstate_latency_us = dc->clk_mgr->bw_params->dummy_pstate_table[i].dummy_pstate_latency_us;
-		context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us;
-		context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us;
-	}
-	context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.fclk_pstate_change_ns = get_fclk_watermark(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	//context->bw_ctx.bw.dcn.watermarks.c.usr_retraining_ns = get_wm_usr_retraining(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-
-	/* Temporary, to have some fclk_pstate_change_ns and usr_retraining_ns wm values until DML is implemented */
-	//context->bw_ctx.bw.dcn.watermarks.c.usr_retraining = context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns / 8;
-	if ((!pstate_en) && (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid)) {
-		/* The only difference between A and C is p-state latency, if p-state is not supported
-		 * with full p-state latency we want to calculate DLG based on dummy p-state latency,
-		 * Set A p-state watermark set to 0 previously, when p-state unsupported, for now keep as previous implementation.
-		 */
-		context->bw_ctx.bw.dcn.watermarks.a = context->bw_ctx.bw.dcn.watermarks.c;
-		context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = 0;
-	} else {
-		/* Set A:
-		 * All clocks min.
-		 * DCFCLK: Min, as reported by PM FW, when available
-		 * UCLK: Min, as reported by PM FW, when available
-		 */
-		dml_update_soc_for_wm_a(dc, context);
-		context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-		context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-		context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-		context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-		context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-		context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-		context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-		context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000;
-	}
-
-	pipes[0].clks_cfg.voltage = vlevel;
-	pipes[0].clks_cfg.dcfclk_mhz = dcfclk_from_validation;
-	pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz;
-
-	for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
-		if (!context->res_ctx.pipe_ctx[i].stream)
-			continue;
-
-		pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt);
-		pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
-
-		if (dc->config.forced_clocks) {
-			pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz;
-			pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz;
-		}
-		if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000)
-			pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0;
-		if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
-			pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0;
-
-		pipe_idx++;
-	}
-
-	context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod;
-
-	dml_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
-
-	if (!pstate_en)
-		/* Restore full p-state latency */
-		context->bw_ctx.dml.soc.dram_clock_change_latency_us =
-				dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
-}
-
-bool dml_validate(struct dc *dc,
-		struct dc_state *context,
-		bool fast_validate)
-{
-	bool out = false;
-
-	BW_VAL_TRACE_SETUP();
-
-	int vlevel = 0;
-	int pipe_cnt = 0;
-	display_e2e_pipe_params_st *pipes = context->bw_ctx.dml.dml_pipe_state;
-	DC_LOGGER_INIT(dc->ctx->logger);
-
-	BW_VAL_TRACE_COUNT();
-
-	out = dml_internal_validate(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate);
-
-	if (pipe_cnt == 0)
-		goto validate_out;
-
-	if (!out)
-		goto validate_fail;
-
-	BW_VAL_TRACE_END_VOLTAGE_LEVEL();
-
-	if (fast_validate) {
-		BW_VAL_TRACE_SKIP(fast);
-		goto validate_out;
-	}
-
-	dml_calculate_wm_and_dlg(dc, context, pipes, pipe_cnt, vlevel);
-
-	BW_VAL_TRACE_END_WATERMARKS();
-
-	goto validate_out;
-
-validate_fail:
-	DC_LOG_WARNING("Mode Validation Warning: %s failed validation.\n",
-		dml_get_status_message(context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states]));
-
-	BW_VAL_TRACE_SKIP(fail);
-	out = false;
-
-validate_out:
-	BW_VAL_TRACE_FINISH();
-
-	return out;
-}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper_translation.c b/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper_translation.c
deleted file mode 100644
index 4ec5310a2962d..0000000000000
--- a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper_translation.c
+++ /dev/null
@@ -1,284 +0,0 @@
-/*
- * Copyright 2017 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#ifdef DML_WRAPPER_TRANSLATION_
-
-static void gfx10array_mode_to_dml_params(
-		enum array_mode_values array_mode,
-		enum legacy_tiling_compat_level compat_level,
-		unsigned int *sw_mode)
-{
-	switch (array_mode) {
-	case DC_ARRAY_LINEAR_ALLIGNED:
-	case DC_ARRAY_LINEAR_GENERAL:
-		*sw_mode = dm_sw_linear;
-		break;
-	case DC_ARRAY_2D_TILED_THIN1:
-// DC_LEGACY_TILING_ADDR_GEN_ZERO - undefined as per current code hence removed
-#if 0
-		if (compat_level == DC_LEGACY_TILING_ADDR_GEN_ZERO)
-			*sw_mode = dm_sw_gfx7_2d_thin_l_vp;
-		else
-			*sw_mode = dm_sw_gfx7_2d_thin_gl;
-#endif
-		break;
-	default:
-		ASSERT(0); /* Not supported */
-		break;
-	}
-}
-
-static void swizzle_to_dml_params(
-		enum swizzle_mode_values swizzle,
-		unsigned int *sw_mode)
-{
-	switch (swizzle) {
-	case DC_SW_LINEAR:
-		*sw_mode = dm_sw_linear;
-		break;
-	case DC_SW_4KB_S:
-		*sw_mode = dm_sw_4kb_s;
-		break;
-	case DC_SW_4KB_S_X:
-		*sw_mode = dm_sw_4kb_s_x;
-		break;
-	case DC_SW_4KB_D:
-		*sw_mode = dm_sw_4kb_d;
-		break;
-	case DC_SW_4KB_D_X:
-		*sw_mode = dm_sw_4kb_d_x;
-		break;
-	case DC_SW_64KB_S:
-		*sw_mode = dm_sw_64kb_s;
-		break;
-	case DC_SW_64KB_S_X:
-		*sw_mode = dm_sw_64kb_s_x;
-		break;
-	case DC_SW_64KB_S_T:
-		*sw_mode = dm_sw_64kb_s_t;
-		break;
-	case DC_SW_64KB_D:
-		*sw_mode = dm_sw_64kb_d;
-		break;
-	case DC_SW_64KB_D_X:
-		*sw_mode = dm_sw_64kb_d_x;
-		break;
-	case DC_SW_64KB_D_T:
-		*sw_mode = dm_sw_64kb_d_t;
-		break;
-	case DC_SW_64KB_R_X:
-		*sw_mode = dm_sw_64kb_r_x;
-		break;
-	case DC_SW_VAR_S:
-		*sw_mode = dm_sw_var_s;
-		break;
-	case DC_SW_VAR_S_X:
-		*sw_mode = dm_sw_var_s_x;
-		break;
-	case DC_SW_VAR_D:
-		*sw_mode = dm_sw_var_d;
-		break;
-	case DC_SW_VAR_D_X:
-		*sw_mode = dm_sw_var_d_x;
-		break;
-
-	default:
-		ASSERT(0); /* Not supported */
-		break;
-	}
-}
-
-static void dc_timing_to_dml_timing(const struct dc_crtc_timing *timing, struct _vcs_dpi_display_pipe_dest_params_st *dest)
-{
-	dest->hblank_start = timing->h_total - timing->h_front_porch;
-	dest->hblank_end = dest->hblank_start
-			- timing->h_addressable
-			- timing->h_border_left
-			- timing->h_border_right;
-	dest->vblank_start = timing->v_total - timing->v_front_porch;
-	dest->vblank_end = dest->vblank_start
-			- timing->v_addressable
-			- timing->v_border_top
-			- timing->v_border_bottom;
-	dest->htotal = timing->h_total;
-	dest->vtotal = timing->v_total;
-	dest->hactive = timing->h_addressable;
-	dest->vactive = timing->v_addressable;
-	dest->interlaced = timing->flags.INTERLACE;
-	dest->pixel_rate_mhz = timing->pix_clk_100hz/10000.0;
-	if (timing->timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING)
-		dest->pixel_rate_mhz *= 2;
-}
-
-static enum odm_combine_mode get_dml_odm_combine(const struct pipe_ctx *pipe)
-{
-	int odm_split_count = 0;
-	enum odm_combine_mode combine_mode = dm_odm_combine_mode_disabled;
-	struct pipe_ctx *next_pipe = pipe->next_odm_pipe;
-
-	// Traverse pipe tree to determine odm split count
-	while (next_pipe) {
-		odm_split_count++;
-		next_pipe = next_pipe->next_odm_pipe;
-	}
-	pipe = pipe->prev_odm_pipe;
-	while (pipe) {
-		odm_split_count++;
-		pipe = pipe->prev_odm_pipe;
-	}
-
-	// Translate split to DML odm combine factor
-	switch (odm_split_count) {
-	case 1:
-		combine_mode = dm_odm_combine_mode_2to1;
-		break;
-	case 3:
-		combine_mode = dm_odm_combine_mode_4to1;
-		break;
-	default:
-		combine_mode = dm_odm_combine_mode_disabled;
-	}
-
-	return combine_mode;
-}
-
-static int get_dml_output_type(enum signal_type dc_signal)
-{
-	int dml_output_type = -1;
-
-	switch (dc_signal) {
-	case SIGNAL_TYPE_DISPLAY_PORT_MST:
-	case SIGNAL_TYPE_DISPLAY_PORT:
-		dml_output_type = dm_dp;
-		break;
-	case SIGNAL_TYPE_EDP:
-		dml_output_type = dm_edp;
-		break;
-	case SIGNAL_TYPE_HDMI_TYPE_A:
-	case SIGNAL_TYPE_DVI_SINGLE_LINK:
-	case SIGNAL_TYPE_DVI_DUAL_LINK:
-		dml_output_type = dm_hdmi;
-		break;
-	default:
-		break;
-	}
-
-	return dml_output_type;
-}
-
-static void populate_color_depth_and_encoding_from_timing(const struct dc_crtc_timing *timing, struct _vcs_dpi_display_output_params_st *dout)
-{
-	int output_bpc = 0;
-
-	switch (timing->display_color_depth) {
-	case COLOR_DEPTH_666:
-		output_bpc = 6;
-		break;
-	case COLOR_DEPTH_888:
-		output_bpc = 8;
-		break;
-	case COLOR_DEPTH_101010:
-		output_bpc = 10;
-		break;
-	case COLOR_DEPTH_121212:
-		output_bpc = 12;
-		break;
-	case COLOR_DEPTH_141414:
-		output_bpc = 14;
-		break;
-	case COLOR_DEPTH_161616:
-		output_bpc = 16;
-		break;
-	case COLOR_DEPTH_999:
-		output_bpc = 9;
-		break;
-	case COLOR_DEPTH_111111:
-		output_bpc = 11;
-		break;
-	default:
-		output_bpc = 8;
-		break;
-	}
-
-	switch (timing->pixel_encoding) {
-	case PIXEL_ENCODING_RGB:
-	case PIXEL_ENCODING_YCBCR444:
-		dout->output_format = dm_444;
-		dout->output_bpp = output_bpc * 3;
-		break;
-	case PIXEL_ENCODING_YCBCR420:
-		dout->output_format = dm_420;
-		dout->output_bpp = (output_bpc * 3.0) / 2;
-		break;
-	case PIXEL_ENCODING_YCBCR422:
-		if (timing->flags.DSC && !timing->dsc_cfg.ycbcr422_simple)
-			dout->output_format = dm_n422;
-		else
-			dout->output_format = dm_s422;
-		dout->output_bpp = output_bpc * 2;
-		break;
-	default:
-		dout->output_format = dm_444;
-		dout->output_bpp = output_bpc * 3;
-	}
-}
-
-static enum source_format_class dc_source_format_to_dml_source_format(enum surface_pixel_format dc_format)
-{
-	enum source_format_class dml_format = dm_444_32;
-
-	switch (dc_format) {
-	case SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr:
-	case SURFACE_PIXEL_FORMAT_VIDEO_420_YCrCb:
-		dml_format = dm_420_8;
-		break;
-	case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr:
-	case SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb:
-		dml_format = dm_420_10;
-		break;
-	case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
-	case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F:
-	case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
-		dml_format = dm_444_64;
-		break;
-	case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555:
-	case SURFACE_PIXEL_FORMAT_GRPH_RGB565:
-		dml_format = dm_444_16;
-		break;
-	case SURFACE_PIXEL_FORMAT_GRPH_PALETA_256_COLORS:
-		dml_format = dm_444_8;
-		break;
-	case SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA:
-		dml_format = dm_rgbe_alpha;
-		break;
-	default:
-		dml_format = dm_444_32;
-		break;
-	}
-
-	return dml_format;
-}
-
-#endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
index 7e43b4403b3e3..01d46f0c1ffe4 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
@@ -312,8 +312,6 @@ struct timing_generator_funcs {
 			int group_idx,
 			uint32_t gsl_ready_signal);
 	void (*set_out_mux)(struct timing_generator *tg, enum otg_out_mux_dest dest);
-	void (*set_vrr_m_const)(struct timing_generator *optc,
-			double vtotal_avg);
 	void (*set_drr_trigger_window)(struct timing_generator *optc,
 			uint32_t window_start, uint32_t window_end);
 	void (*set_vtotal_change_limit)(struct timing_generator *optc,
diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
index e054f3494087f..9f3558c0ef110 100644
--- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h
+++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
@@ -247,10 +247,12 @@ enum {
 #define AMDGPU_FAMILY_GC_11_0_1 148
 #define GC_11_0_0_A0 0x1
 #define GC_11_0_2_A0 0x10
+#define GC_11_0_3_A0 0x20
 #define GC_11_UNKNOWN 0xFF
 
 #define ASICREV_IS_GC_11_0_0(eChipRev) (eChipRev < GC_11_0_2_A0)
-#define ASICREV_IS_GC_11_0_2(eChipRev) (eChipRev >= GC_11_0_2_A0 && eChipRev < GC_11_UNKNOWN)
+#define ASICREV_IS_GC_11_0_2(eChipRev) (eChipRev >= GC_11_0_2_A0 && eChipRev < GC_11_0_3_A0)
+#define ASICREV_IS_GC_11_0_3(eChipRev) (eChipRev >= GC_11_0_3_A0 && eChipRev < GC_11_UNKNOWN)
 
 /*
  * ASIC chip ID
diff --git a/drivers/gpu/drm/amd/display/include/logger_types.h b/drivers/gpu/drm/amd/display/include/logger_types.h
index 5d19a43189000..d89cf3e76c6dc 100644
--- a/drivers/gpu/drm/amd/display/include/logger_types.h
+++ b/drivers/gpu/drm/amd/display/include/logger_types.h
@@ -121,13 +121,15 @@ enum dc_log_type {
 	LOG_DSC,
 #endif
 	LOG_SMU_MSG,
+	LOG_DC2RESERVED4,
+	LOG_DC2RESERVED5,
 	LOG_DWB,
 	LOG_GAMMA_DEBUG,
 	LOG_MAX_HW_POINTS,
 	LOG_ALL_TF_CHANNELS,
 	LOG_SAMPLE_1DLUT,
 	LOG_DP2,
-	LOG_SECTION_TOTAL_COUNT
+	LOG_DC2RESERVED12,
 };
 
 #define DC_MIN_LOG_MASK ((1 << LOG_ERROR) | \
diff --git a/drivers/gpu/drm/amd/dkms/config/config.h b/drivers/gpu/drm/amd/dkms/config/config.h
index 58cffa4328dd7..28806304010c5 100644
--- a/drivers/gpu/drm/amd/dkms/config/config.h
+++ b/drivers/gpu/drm/amd/dkms/config/config.h
@@ -402,6 +402,9 @@
 /* drm_dp_mst_{get,put}_port_malloc() is available */
 #define HAVE_DRM_DP_MST_GET_PUT_PORT_MALLOC 1
 
+/* struct drm_dp_mst_port has passthrough_aux member */
+/* #undef HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX */
+
 /* struct drm_dp_mst_topology_cbs->destroy_connector is available */
 /* #undef HAVE_DRM_DP_MST_TOPOLOGY_CBS_DESTROY_CONNECTOR */
 
diff --git a/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-port.m4 b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-port.m4
new file mode 100644
index 0000000000000..a1f26ca53e149
--- /dev/null
+++ b/drivers/gpu/drm/amd/dkms/m4/drm-dp-mst-port.m4
@@ -0,0 +1,23 @@
+dnl #
+dnl # commit v5.18-2579-g3af4b1f1d6e7
+dnl # "drm/dp_mst: add passthrough_aux to struct drm_dp_mst_port"
+dnl
+AC_DEFUN([AC_AMDGPU_DRM_DP_MST_POST_PASSTHROUGH_AUX], [
+	AC_KERNEL_DO_BACKGROUND([
+		AC_KERNEL_TRY_COMPILE([
+			#if defined(HAVE_DRM_DISPLAY_DRM_DP_MST_HELPER_H)
+			#include <drm/display/drm_dp_mst_helper.h>
+			#elif defined(HAVE_DRM_DP_DRM_DP_MST_HELPER_H)
+			#include <drm/dp/drm_dp_mst_helper.h>
+			#else
+			#include <drm/drm_dp_mst_helper.h>
+			#endif
+		], [
+			struct drm_dp_mst_port *dp_mst_port = NULL;
+			dp_mst_port->passthrough_aux = NULL;
+		], [
+			AC_DEFINE(HAVE_DRM_DP_MST_PORT_PASSTHROUGH_AUX, 1,
+				[struct drm_dp_mst_port has passthrough_aux member])
+		])
+	])
+])
diff --git a/drivers/gpu/drm/amd/dkms/m4/kernel.m4 b/drivers/gpu/drm/amd/dkms/m4/kernel.m4
index 3bcd090500e66..42ee5087cd7b1 100644
--- a/drivers/gpu/drm/amd/dkms/m4/kernel.m4
+++ b/drivers/gpu/drm/amd/dkms/m4/kernel.m4
@@ -282,6 +282,7 @@ AC_DEFUN([AC_CONFIG_KERNEL], [
 	AC_AMDGPU_DRM_GEM_OBJECT_FUNCS_VMAP_HAS_IOSYS_MAP_ARG
 	AC_AMDGPU_DRM_FB_HELPER_BUFFER
 	AC_AMDGPU_DRM_DP_MST_TOPOLOGY_MGR_BASE
+	AC_AMDGPU_DRM_DP_MST_POST_PASSTHROUGH_AUX
 
 	AC_KERNEL_WAIT
 	AS_IF([test "$LINUX_OBJ" != "$LINUX"], [
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 956b6ce81c846..1b300c569faf5 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -668,6 +668,51 @@ int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev,
 	return ret;
 }
 
+int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev, bool value)
+{
+	struct smu_context *smu = adev->powerplay.pp_handle;
+	int ret = 0;
+
+	if (!is_support_sw_smu(adev))
+		return -EOPNOTSUPP;
+
+	mutex_lock(&adev->pm.mutex);
+	ret = smu_set_residency_gfxoff(smu, value);
+	mutex_unlock(&adev->pm.mutex);
+
+	return ret;
+}
+
+int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev, u32 *value)
+{
+	struct smu_context *smu = adev->powerplay.pp_handle;
+	int ret = 0;
+
+	if (!is_support_sw_smu(adev))
+		return -EOPNOTSUPP;
+
+	mutex_lock(&adev->pm.mutex);
+	ret = smu_get_residency_gfxoff(smu, value);
+	mutex_unlock(&adev->pm.mutex);
+
+	return ret;
+}
+
+int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device *adev, u64 *value)
+{
+	struct smu_context *smu = adev->powerplay.pp_handle;
+	int ret = 0;
+
+	if (!is_support_sw_smu(adev))
+		return -EOPNOTSUPP;
+
+	mutex_lock(&adev->pm.mutex);
+	ret = smu_get_entrycount_gfxoff(smu, value);
+	mutex_unlock(&adev->pm.mutex);
+
+	return ret;
+}
+
 int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev, uint32_t *value)
 {
 	struct smu_context *smu = adev->powerplay.pp_handle;
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
index 65624d091ed2d..cb5b9df78b4db 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
@@ -435,6 +435,9 @@ int amdgpu_dpm_set_soft_freq_range(struct amdgpu_device *adev,
 int amdgpu_dpm_write_watermarks_table(struct amdgpu_device *adev);
 int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev, enum smu_event_type event,
 		       uint64_t event_arg);
+int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev, u32 *value);
+int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev, bool value);
+int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device *adev, u64 *value);
 int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev, uint32_t *value);
 uint64_t amdgpu_dpm_get_thermal_throttling_counter(struct amdgpu_device *adev);
 void amdgpu_dpm_gfx_state_change(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 7510d470b8643..55b7910b43852 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -90,6 +90,30 @@ static int smu_sys_set_pp_feature_mask(void *handle,
 	return smu_set_pp_feature_mask(smu, new_mask);
 }
 
+int smu_set_residency_gfxoff(struct smu_context *smu, bool value)
+{
+	if (!smu->ppt_funcs->set_gfx_off_residency)
+		return -EINVAL;
+
+	return smu_set_gfx_off_residency(smu, value);
+}
+
+int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value)
+{
+	if (!smu->ppt_funcs->get_gfx_off_residency)
+		return -EINVAL;
+
+	return smu_get_gfx_off_residency(smu, value);
+}
+
+int smu_get_entrycount_gfxoff(struct smu_context *smu, u64 *value)
+{
+	if (!smu->ppt_funcs->get_gfx_off_entrycount)
+		return -EINVAL;
+
+	return smu_get_gfx_off_entrycount(smu, value);
+}
+
 int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value)
 {
 	if (!smu->ppt_funcs->get_gfx_off_status)
@@ -1576,6 +1600,7 @@ static int smu_suspend(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct smu_context *smu = adev->powerplay.pp_handle;
 	int ret;
+	uint64_t count;
 
 	if (amdgpu_sriov_vf(adev)&& !amdgpu_sriov_is_pp_one_vf(adev))
 		return 0;
@@ -1593,6 +1618,14 @@ static int smu_suspend(void *handle)
 
 	smu_set_gfx_cgpg(smu, false);
 
+	/*
+	 * pwfw resets entrycount when device is suspended, so we save the
+	 * last value to be used when we resume to keep it consistent
+	 */
+	ret = smu_get_entrycount_gfxoff(smu, &count);
+	if (!ret)
+		adev->gfx.gfx_off_entrycount = count;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index b81c657c73860..e2fa3b066b968 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -1111,6 +1111,22 @@ struct pptable_funcs {
 	 */
 	uint32_t (*get_gfx_off_status)(struct smu_context *smu);
 
+	/**
+	 * @gfx_off_entrycount: total GFXOFF entry count at the time of
+	 * query since system power-up
+	 */
+	u32 (*get_gfx_off_entrycount)(struct smu_context *smu, uint64_t *entrycount);
+
+	/**
+	 * @set_gfx_off_residency: set 1 to start logging, 0 to stop logging
+	 */
+	u32 (*set_gfx_off_residency)(struct smu_context *smu, bool start);
+
+	/**
+	 * @get_gfx_off_residency: Average GFXOFF residency % during the logging interval
+	 */
+	u32 (*get_gfx_off_residency)(struct smu_context *smu, uint32_t *residency);
+
 	/**
 	 * @register_irq_handler: Register interupt request handlers.
 	 */
@@ -1454,6 +1470,12 @@ int smu_set_ac_dc(struct smu_context *smu);
 
 int smu_allow_xgmi_power_down(struct smu_context *smu, bool en);
 
+int smu_get_entrycount_gfxoff(struct smu_context *smu, u64 *value);
+
+int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value);
+
+int smu_set_residency_gfxoff(struct smu_context *smu, bool value);
+
 int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value);
 
 int smu_handle_passthrough_sbr(struct smu_context *smu, bool enable);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h
index fe130a497d6c3..7471e2df28285 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v11_5_ppsmc.h
@@ -108,7 +108,10 @@
 #define PPSMC_MSG_SetSlowPPTLimit                      0x4A
 #define PPSMC_MSG_GetFastPPTLimit                      0x4B
 #define PPSMC_MSG_GetSlowPPTLimit                      0x4C
-#define PPSMC_Message_Count                            0x4D
+#define PPSMC_MSG_GetGfxOffStatus		       0x50
+#define PPSMC_MSG_GetGfxOffEntryCount		       0x51
+#define PPSMC_MSG_LogGfxOffResidency		       0x52
+#define PPSMC_Message_Count                            0x53
 
 //Argument for PPSMC_MSG_GfxDeviceDriverReset
 enum {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index 28f6a1eb6945c..58098b82df660 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -236,7 +236,10 @@
 	__SMU_DUMMY_MAP(HeavySBR),			\
 	__SMU_DUMMY_MAP(SetBadHBMPagesRetiredFlagsPerChannel), \
 	__SMU_DUMMY_MAP(EnableGfxImu), \
-	__SMU_DUMMY_MAP(DriverMode2Reset),
+	__SMU_DUMMY_MAP(DriverMode2Reset), \
+	__SMU_DUMMY_MAP(GetGfxOffStatus),		 \
+	__SMU_DUMMY_MAP(GetGfxOffEntryCount),		 \
+	__SMU_DUMMY_MAP(LogGfxOffResidency),
 
 #undef __SMU_DUMMY_MAP
 #define __SMU_DUMMY_MAP(type)	SMU_MSG_##type
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index c02e5e5767282..6fe2fe92ebd75 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -28,7 +28,7 @@
 #define SMU13_DRIVER_IF_VERSION_INV 0xFFFFFFFF
 #define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04
 #define SMU13_DRIVER_IF_VERSION_ALDE 0x08
-#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x04
+#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x05
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x2C
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
index 89504ff8e9ed7..847990145dcd9 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -138,6 +138,9 @@ static struct cmn2asic_msg_mapping vangogh_message_map[SMU_MSG_MAX_COUNT] = {
 	MSG_MAP(SetSlowPPTLimit,                    PPSMC_MSG_SetSlowPPTLimit,						0),
 	MSG_MAP(GetFastPPTLimit,                    PPSMC_MSG_GetFastPPTLimit,						0),
 	MSG_MAP(GetSlowPPTLimit,                    PPSMC_MSG_GetSlowPPTLimit,						0),
+	MSG_MAP(GetGfxOffStatus,		    PPSMC_MSG_GetGfxOffStatus,						0),
+	MSG_MAP(GetGfxOffEntryCount,		    PPSMC_MSG_GetGfxOffEntryCount,					0),
+	MSG_MAP(LogGfxOffResidency,		    PPSMC_MSG_LogGfxOffResidency,					0),
 };
 
 static struct cmn2asic_mapping vangogh_feature_mask_map[SMU_FEATURE_COUNT] = {
@@ -2200,6 +2203,76 @@ static int vangogh_set_power_limit(struct smu_context *smu,
 	return ret;
 }
 
+/**
+ * vangogh_set_gfxoff_residency
+ *
+ * @smu: amdgpu_device pointer
+ * @start: start/stop residency log
+ *
+ * This function will be used to log gfxoff residency
+ *
+ *
+ * Returns standard response codes.
+ */
+static u32 vangogh_set_gfxoff_residency(struct smu_context *smu, bool start)
+{
+	int ret = 0;
+	u32 residency;
+	struct amdgpu_device *adev = smu->adev;
+
+	if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
+		return 0;
+
+	ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_LogGfxOffResidency,
+					      start, &residency);
+
+	if (!start)
+		adev->gfx.gfx_off_residency = residency;
+
+	return ret;
+}
+
+/**
+ * vangogh_get_gfxoff_residency
+ *
+ * @smu: amdgpu_device pointer
+ *
+ * This function will be used to get gfxoff residency.
+ *
+ * Returns standard response codes.
+ */
+static u32 vangogh_get_gfxoff_residency(struct smu_context *smu, uint32_t *residency)
+{
+	struct amdgpu_device *adev = smu->adev;
+
+	*residency = adev->gfx.gfx_off_residency;
+
+	return 0;
+}
+
+/**
+ * vangogh_get_gfxoff_entrycount - get gfxoff entry count
+ *
+ * @smu: amdgpu_device pointer
+ *
+ * This function will be used to get gfxoff entry count
+ *
+ * Returns standard response codes.
+ */
+static u32 vangogh_get_gfxoff_entrycount(struct smu_context *smu, uint64_t *entrycount)
+{
+	int ret = 0, value = 0;
+	struct amdgpu_device *adev = smu->adev;
+
+	if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
+		return 0;
+
+	ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetGfxOffEntryCount, &value);
+	*entrycount = value + adev->gfx.gfx_off_entrycount;
+
+	return ret;
+}
+
 static const struct pptable_funcs vangogh_ppt_funcs = {
 
 	.check_fw_status = smu_v11_0_check_fw_status,
@@ -2237,6 +2310,9 @@ static const struct pptable_funcs vangogh_ppt_funcs = {
 	.mode2_reset = vangogh_mode2_reset,
 	.gfx_off_control = smu_v11_0_gfx_off_control,
 	.get_gfx_off_status = vangogh_get_gfxoff_status,
+	.get_gfx_off_entrycount = vangogh_get_gfxoff_entrycount,
+	.get_gfx_off_residency = vangogh_get_gfxoff_residency,
+	.set_gfx_off_residency = vangogh_set_gfxoff_residency,
 	.get_ppt_limit = vangogh_get_ppt_limit,
 	.get_power_limit = vangogh_get_power_limit,
 	.set_power_limit = vangogh_set_power_limit,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index e56ec06012ddc..3651f6f750680 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -2345,8 +2345,8 @@ int smu_v13_0_set_gfx_power_up_by_imu(struct smu_context *smu)
 
 	index = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_MSG,
 					       SMU_MSG_EnableGfxImu);
-
-	return smu_cmn_send_msg_without_waiting(smu, index, 0);
+	/* Param 1 to tell PMFW to enable GFXOFF feature */
+	return smu_cmn_send_msg_without_waiting(smu, index, 1);
 }
 
 int smu_v13_0_od_edit_dpm_table(struct smu_context *smu,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
index 7469bbfce1fb0..ceb13c8380679 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
@@ -47,6 +47,9 @@
 #define smu_notify_memory_pool_location(smu)				smu_ppt_funcs(notify_memory_pool_location, 0, smu)
 #define smu_gfx_off_control(smu, enable)				smu_ppt_funcs(gfx_off_control, 0, smu, enable)
 #define smu_get_gfx_off_status(smu)						smu_ppt_funcs(get_gfx_off_status, 0, smu)
+#define smu_get_gfx_off_entrycount(smu, value)						smu_ppt_funcs(get_gfx_off_entrycount, 0, smu, value)
+#define smu_get_gfx_off_residency(smu, value)						smu_ppt_funcs(get_gfx_off_residency, 0, smu, value)
+#define smu_set_gfx_off_residency(smu, value)						smu_ppt_funcs(set_gfx_off_residency, 0, smu, value)
 #define smu_set_last_dcef_min_deep_sleep_clk(smu)			smu_ppt_funcs(set_last_dcef_min_deep_sleep_clk, 0, smu)
 #define smu_system_features_control(smu, en)				smu_ppt_funcs(system_features_control, 0, smu, en)
 #define smu_init_max_sustainable_clocks(smu)				smu_ppt_funcs(init_max_sustainable_clocks, 0, smu)
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 22dcdef082cc5..1197ecfc4e8c8 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -80,7 +80,7 @@ static int ttm_global_init(void)
 
 	si_meminfo(&si);
 
-	ttm_debugfs_root = debugfs_create_dir("ttm", NULL);
+	ttm_debugfs_root = debugfs_create_dir(TTM_NAME, NULL);
 	if (IS_ERR(ttm_debugfs_root)) {
 		ttm_debugfs_root = NULL;
 	}
diff --git a/include/kcl/kcl_cpumask.h b/include/kcl/kcl_cpumask.h
new file mode 100644
index 0000000000000..aee779d6ec5f2
--- /dev/null
+++ b/include/kcl/kcl_cpumask.h
@@ -0,0 +1,43 @@
+/*SPDX-License-Identifier: GPL-2.0*/
+
+#include <linux/kernel.h>
+#include <linux/threads.h>
+#include <linux/bitmap.h>
+#include <linux/bug.h>
+#include <linux/cpumask.h>
+
+#ifndef for_each_cpu_wrap
+
+extern int _kcl_cpumask_next_wrap(int n, const struct cpumask *mask, 
+				  int start, bool wrap);
+
+static inline 
+int cpumask_next_wrap(int n, const struct cpumask *mask,
+		      int start, bool wrap) 
+{
+return _kcl_cpumask_next_wrap(n, mask, start, wrap);
+}
+
+/* Copied from include/linux/cpumask.h */
+#if NR_CPUS == 1
+#define for_each_cpu_wrap(cpu, mask, start)     \
+        for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)(start))
+#else
+/**
+ * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location
+ * @cpu: the (optionally unsigned) integer iterator
+ * @mask: the cpumask pointer
+ * @start: the start location
+ *
+ * The implementation does not assume any bit in @mask is set (including @start).
+ *
+ * After the loop, cpu is >= nr_cpu_ids.
+ */
+#define for_each_cpu_wrap(cpu, mask, start)                                     \
+        for ((cpu) = cpumask_next_wrap((start)-1, (mask), (start), false);      \
+             (cpu) < nr_cpumask_bits;                                           \
+             (cpu) = cpumask_next_wrap((cpu), (mask), (start), true))
+
+#endif
+#endif
+
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index d247f1e8550a2..b9b688183f1db 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -1076,37 +1076,6 @@ struct kfd_ioctl_ipc_import_handle_args {
 	__u32 flags;		/* from KFD */
 };
 
-struct kfd_memory_range {
-	__u64 va_addr;
-	__u64 size;
-};
-
-/* flags definitions
- * BIT0: 0: read operation, 1: write operation.
- * This also identifies if the src or dst array belongs to remote process
- */
-#define KFD_CROSS_MEMORY_RW_BIT (1 << 0)
-#define KFD_SET_CROSS_MEMORY_READ(flags) (flags &= ~KFD_CROSS_MEMORY_RW_BIT)
-#define KFD_SET_CROSS_MEMORY_WRITE(flags) (flags |= KFD_CROSS_MEMORY_RW_BIT)
-#define KFD_IS_CROSS_MEMORY_WRITE(flags) (flags & KFD_CROSS_MEMORY_RW_BIT)
-
-struct kfd_ioctl_cross_memory_copy_args {
-	/* to KFD: Process ID of the remote process */
-	__u32 pid;
-	/* to KFD: See above definition */
-	__u32 flags;
-	/* to KFD: Source GPU VM range */
-	__u64 src_mem_range_array;
-	/* to KFD: Size of above array */
-	__u64 src_mem_array_size;
-	/* to KFD: Destination GPU VM range */
-	__u64 dst_mem_range_array;
-	/* to KFD: Size of above array */
-	__u64 dst_mem_array_size;
-	/* from KFD: Total amount of bytes copied */
-	__u64 bytes_copied;
-};
-
 /* Guarantee host access to memory */
 #define KFD_IOCTL_SVM_FLAG_HOST_ACCESS 0x00000001
 /* Fine grained coherency between all devices with access */
@@ -1395,9 +1364,6 @@ struct kfd_ioctl_set_xnack_mode_args {
 #define AMDKFD_IOC_DBG_TRAP			\
 		AMDKFD_IOWR(0x82, struct kfd_ioctl_dbg_trap_args)
 
-#define AMDKFD_IOC_CROSS_MEMORY_COPY		\
-		AMDKFD_IOWR(0x83, struct kfd_ioctl_cross_memory_copy_args)
-
 #define AMDKFD_IOC_RLC_SPM		\
 		AMDKFD_IOWR(0x84, struct kfd_ioctl_spm_args)
 
@@ -1405,5 +1371,4 @@ struct kfd_ioctl_set_xnack_mode_args {
 #define AMDKFD_COMMAND_START_2		0x80
 #define AMDKFD_COMMAND_END_2		0x85
 
-
 #endif
diff --git a/kernel/fork.c b/kernel/fork.c
index 518fba1f3abac..1e9ecaa4f5ac5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1401,7 +1401,6 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
 
 	return mm;
 }
-EXPORT_SYMBOL_GPL(mm_access);
 
 static void complete_vfork_done(struct task_struct *tsk)
 {