From c4eb2f88d2796ab90c5430e11c48709716181364 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Fri, 25 Apr 2025 11:28:22 +0200 Subject: [PATCH 01/32] accel/ivpu: Increase state dump msg timeout Increase JMS message state dump command timeout to 100 ms. On some platforms, the FW may take a bit longer than 50 ms to dump its state to the log buffer and we don't want to miss any debug info during TDR. Fixes: 5e162f872d7a ("accel/ivpu: Add FW state dump on TDR") Cc: stable@vger.kernel.org # v6.13+ Reviewed-by: Jeff Hugo Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250425092822.2194465-1-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c index ec9a3629da3a..633160470c93 100644 --- a/drivers/accel/ivpu/ivpu_hw.c +++ b/drivers/accel/ivpu/ivpu_hw.c @@ -119,7 +119,7 @@ static void timeouts_init(struct ivpu_device *vdev) else vdev->timeout.autosuspend = 100; vdev->timeout.d0i3_entry_msg = 5; - vdev->timeout.state_dump_msg = 10; + vdev->timeout.state_dump_msg = 100; } } From f2ecc700d1ef53127c14a3463442a8ac2a6a5cf5 Mon Sep 17 00:00:00 2001 From: Jacek Lawrynowicz Date: Fri, 25 Apr 2025 11:33:40 +0200 Subject: [PATCH 02/32] accel/ivpu: Fix pm related deadlocks in cmdq ioctls Fix deadlocks in ivpu_cmdq_create_ioctl() and ivpu_cmdq_destroy_ioctl() related to runtime suspend. Runtime suspend acquires file_priv->lock mutex by calling ivpu_cmdq_reset_all_contexts(). The same lock is acquired in the cmdq ioctls. If one of the cmdq ioctls is called while runtime suspend is in progress, it can lead to a deadlock. Call stacks from example deadlock below. Runtime suspend thread: [ 3443.179717] Call Trace: [ 3443.179724] __schedule+0x4b6/0x16b0 [ 3443.179732] ? __mod_timer+0x27d/0x3a0 [ 3443.179738] schedule+0x2f/0x140 [ 3443.179741] schedule_preempt_disabled+0x19/0x30 [ 3443.179743] __mutex_lock.constprop.0+0x335/0x7d0 [ 3443.179745] ? xas_find+0x1ed/0x260 [ 3443.179747] ? xa_find+0x8e/0xf0 [ 3443.179749] __mutex_lock_slowpath+0x13/0x20 [ 3443.179751] mutex_lock+0x41/0x60 [ 3443.179757] ivpu_cmdq_reset_all_contexts+0x82/0x150 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.179786] ivpu_pm_runtime_suspend_cb+0x1f1/0x3f0 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.179850] pci_pm_runtime_suspend+0x6e/0x1f0 [ 3443.179870] ? __pfx_pci_pm_runtime_suspend+0x10/0x10 [ 3443.179886] __rpm_callback+0x48/0x130 [ 3443.179899] rpm_callback+0x64/0x70 [ 3443.179911] rpm_suspend+0x12c/0x630 [ 3443.179922] ? __schedule+0x4be/0x16b0 [ 3443.179941] pm_runtime_work+0xca/0xf0 [ 3443.179955] process_one_work+0x188/0x3d0 [ 3443.179971] worker_thread+0x2b9/0x3c0 [ 3443.179984] kthread+0xfb/0x220 [ 3443.180001] ? __pfx_worker_thread+0x10/0x10 [ 3443.180013] ? __pfx_kthread+0x10/0x10 [ 3443.180029] ret_from_fork+0x47/0x70 [ 3443.180044] ? __pfx_kthread+0x10/0x10 [ 3443.180059] ret_from_fork_asm+0x1a/0x30 User space thread: [ 3443.180128] Call Trace: [ 3443.180138] __schedule+0x4b6/0x16b0 [ 3443.180159] schedule+0x2f/0x140 [ 3443.180163] rpm_resume+0x1a7/0x6a0 [ 3443.180165] ? __pfx_autoremove_wake_function+0x10/0x10 [ 3443.180169] __pm_runtime_resume+0x56/0x90 [ 3443.180171] ivpu_rpm_get+0x28/0xb0 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.180181] ivpu_ipc_send_receive+0x6d/0x120 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.180193] ? free_frozen_pages+0x395/0x670 [ 3443.180199] ? __free_pages+0xa7/0xc0 [ 3443.180202] ivpu_jsm_hws_destroy_cmdq+0x76/0xf0 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.180213] ? locks_dispose_list+0x6c/0xa0 [ 3443.180219] ? kmem_cache_free+0x342/0x470 [ 3443.180222] ? vm_area_free+0x19/0x30 [ 3443.180225] ? xas_load+0x17/0xf0 [ 3443.180229] ? xa_load+0x72/0xb0 [ 3443.180230] ivpu_cmdq_unregister.isra.0+0xb1/0x100 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.180241] ivpu_cmdq_destroy_ioctl+0x8d/0x130 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.180251] ? __pfx_ivpu_cmdq_destroy_ioctl+0x10/0x10 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.180260] drm_ioctl_kernel+0xb3/0x110 [ 3443.180265] drm_ioctl+0x2ca/0x580 [ 3443.180266] ? __pfx_ivpu_cmdq_destroy_ioctl+0x10/0x10 [intel_vpu a9bd091a97f28f0235f161316b29f8234f437295] [ 3443.180275] ? __fput+0x1ae/0x2f0 [ 3443.180279] ? kmem_cache_free+0x342/0x470 [ 3443.180282] __x64_sys_ioctl+0xa9/0xe0 [ 3443.180286] x64_sys_call+0x13b7/0x26f0 [ 3443.180289] do_syscall_64+0x62/0x180 [ 3443.180291] entry_SYSCALL_64_after_hwframe+0x71/0x79 Fixes: 465a3914b254 ("accel/ivpu: Add API for command queue create/destroy/submit") Reviewed-by: Jeff Hugo Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250425093341.2202895-1-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_job.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 863e3cd6ace5..e17b3deda201 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -874,15 +874,21 @@ int ivpu_cmdq_submit_ioctl(struct drm_device *dev, void *data, struct drm_file * int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; struct drm_ivpu_cmdq_create *args = data; struct ivpu_cmdq *cmdq; + int ret; - if (!ivpu_is_capable(file_priv->vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) + if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) return -ENODEV; if (args->priority > DRM_IVPU_JOB_PRIORITY_REALTIME) return -EINVAL; + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + mutex_lock(&file_priv->lock); cmdq = ivpu_cmdq_create(file_priv, ivpu_job_to_jsm_priority(args->priority), false); @@ -891,6 +897,8 @@ int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file * mutex_unlock(&file_priv->lock); + ivpu_rpm_put(vdev); + return cmdq ? 0 : -ENOMEM; } @@ -900,28 +908,35 @@ int ivpu_cmdq_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file struct ivpu_device *vdev = file_priv->vdev; struct drm_ivpu_cmdq_destroy *args = data; struct ivpu_cmdq *cmdq; - u32 cmdq_id; + u32 cmdq_id = 0; int ret; if (!ivpu_is_capable(vdev, DRM_IVPU_CAP_MANAGE_CMDQ)) return -ENODEV; + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + mutex_lock(&file_priv->lock); cmdq = xa_load(&file_priv->cmdq_xa, args->cmdq_id); if (!cmdq || cmdq->is_legacy) { ret = -ENOENT; - goto err_unlock; + } else { + cmdq_id = cmdq->id; + ivpu_cmdq_destroy(file_priv, cmdq); + ret = 0; } - cmdq_id = cmdq->id; - ivpu_cmdq_destroy(file_priv, cmdq); mutex_unlock(&file_priv->lock); - ivpu_cmdq_abort_all_jobs(vdev, file_priv->ctx.id, cmdq_id); - return 0; -err_unlock: - mutex_unlock(&file_priv->lock); + /* Abort any pending jobs only if cmdq was destroyed */ + if (!ret) + ivpu_cmdq_abort_all_jobs(vdev, file_priv->ctx.id, cmdq_id); + + ivpu_rpm_put(vdev); + return ret; } From 75680b7cd461b169c7ccd2a0fba7542868b7fce2 Mon Sep 17 00:00:00 2001 From: Karol Wachowski Date: Fri, 25 Apr 2025 11:36:56 +0200 Subject: [PATCH 03/32] accel/ivpu: Correct mutex unlock order in job submission The mutex unlock for vdev->submitted_jobs_lock was incorrectly placed before unlocking file_priv->lock. Change order of unlocks to avoid potential race conditions. Fixes: 5bbccadaf33e ("accel/ivpu: Abort all jobs after command queue unregister") Signed-off-by: Karol Wachowski Reviewed-by: Jeff Hugo Signed-off-by: Jacek Lawrynowicz Link: https://lore.kernel.org/r/20250425093656.2228168-1-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/ivpu_job.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index e17b3deda201..b28da35c30b6 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -681,8 +681,8 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id) err_erase_xa: xa_erase(&vdev->submitted_jobs_xa, job->job_id); err_unlock: - mutex_unlock(&vdev->submitted_jobs_lock); mutex_unlock(&file_priv->lock); + mutex_unlock(&vdev->submitted_jobs_lock); ivpu_rpm_put(vdev); return ret; } From b662b162c3d06f120749eea0351ec9317d9dd905 Mon Sep 17 00:00:00 2001 From: Feng Jiang Date: Wed, 9 Apr 2025 09:46:33 +0800 Subject: [PATCH 04/32] drm: Fix potential overflow issue in event_string array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When calling scnprintf() to append recovery method to event_string, the second argument should be `sizeof(event_string) - len`, otherwise there is a potential overflow problem. Fixes: b7cf9f4ac1b8 ("drm: Introduce device wedged event") Signed-off-by: Feng Jiang Reviewed-by: André Almeida Reviewed-by: Raag Jadav Link: https://lore.kernel.org/r/20250409014633.31303-1-jiangfeng@kylinos.cn Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/drm_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 17fc5dc708f4..60e5ac179c15 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -549,7 +549,7 @@ int drm_dev_wedged_event(struct drm_device *dev, unsigned long method) if (drm_WARN_ONCE(dev, !recovery, "invalid recovery method %u\n", opt)) break; - len += scnprintf(event_string + len, sizeof(event_string), "%s,", recovery); + len += scnprintf(event_string + len, sizeof(event_string) - len, "%s,", recovery); } if (recovery) From 35e4079bf1a2570abffce6ababa631afcf8ea0e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Wed, 30 Apr 2025 17:51:52 -0300 Subject: [PATCH 05/32] drm/v3d: Add job to pending list if the reset was skipped MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a CL/CSD job times out, we check if the GPU has made any progress since the last timeout. If so, instead of resetting the hardware, we skip the reset and let the timer get rearmed. This gives long-running jobs a chance to complete. However, when `timedout_job()` is called, the job in question is removed from the pending list, which means it won't be automatically freed through `free_job()`. Consequently, when we skip the reset and keep the job running, the job won't be freed when it finally completes. This situation leads to a memory leak, as exposed in [1] and [2]. Similarly to commit 704d3d60fec4 ("drm/etnaviv: don't block scheduler when GPU is still active"), this patch ensures the job is put back on the pending list when extending the timeout. Cc: stable@vger.kernel.org # 6.0 Reported-by: Daivik Bhatia Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12227 [1] Closes: https://github.com/raspberrypi/linux/issues/6817 [2] Reviewed-by: Iago Toral Quiroga Acked-by: Tvrtko Ursulin Link: https://lore.kernel.org/r/20250430210643.57924-1-mcanal@igalia.com Signed-off-by: Maíra Canal --- drivers/gpu/drm/v3d/v3d_sched.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 4a7701a33cf8..eb35482f6fb5 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -744,11 +744,16 @@ v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job) return DRM_GPU_SCHED_STAT_NOMINAL; } -/* If the current address or return address have changed, then the GPU - * has probably made progress and we should delay the reset. This - * could fail if the GPU got in an infinite loop in the CL, but that - * is pretty unlikely outside of an i-g-t testcase. - */ +static void +v3d_sched_skip_reset(struct drm_sched_job *sched_job) +{ + struct drm_gpu_scheduler *sched = sched_job->sched; + + spin_lock(&sched->job_list_lock); + list_add(&sched_job->list, &sched->pending_list); + spin_unlock(&sched->job_list_lock); +} + static enum drm_gpu_sched_stat v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q, u32 *timedout_ctca, u32 *timedout_ctra) @@ -758,9 +763,16 @@ v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q, u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(q)); u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(q)); + /* If the current address or return address have changed, then the GPU + * has probably made progress and we should delay the reset. This + * could fail if the GPU got in an infinite loop in the CL, but that + * is pretty unlikely outside of an i-g-t testcase. + */ if (*timedout_ctca != ctca || *timedout_ctra != ctra) { *timedout_ctca = ctca; *timedout_ctra = ctra; + + v3d_sched_skip_reset(sched_job); return DRM_GPU_SCHED_STAT_NOMINAL; } @@ -800,11 +812,13 @@ v3d_csd_job_timedout(struct drm_sched_job *sched_job) struct v3d_dev *v3d = job->base.v3d; u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4(v3d->ver)); - /* If we've made progress, skip reset and let the timer get - * rearmed. + /* If we've made progress, skip reset, add the job to the pending + * list, and let the timer get rearmed. */ if (job->timedout_batches != batches) { job->timedout_batches = batches; + + v3d_sched_skip_reset(sched_job); return DRM_GPU_SCHED_STAT_NOMINAL; } From 936b73feab5cd7eae8fe3d08a7ac9b1f8ac68042 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Mon, 28 Apr 2025 11:35:55 -0700 Subject: [PATCH 06/32] drm/i915/slpc: Balance the inc/dec for num_waiters As seen in some recent failures, SLPC num_waiters value is < 0. This happens because the inc/dec are not balanced. We should skip decrement for the same conditions as the increment. Currently, we do that for power saving profile mode. This patch also ensures that num_waiters is incremented in the case min_softlimit is at boost freq. It ensures that we don't reduce the frequency while this request is in flight. v2: Add Fixes tags Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13598 Fixes: f864a29afc32 ("drm/i915/slpc: Optmize waitboost for SLPC") Fixes: 4a82ceb04ad4 ("drm/i915/slpc: Add sysfs for SLPC power profiles") Cc: Sk Anirban Reviewed-by: Sk Anirban Signed-off-by: Vinay Belgaumkar Signed-off-by: Daniele Ceraolo Spurio Link: https://lore.kernel.org/r/20250428183555.3250021-1-vinay.belgaumkar@intel.com (cherry picked from commit d26e55085f4b7a63677670db827541209257b313) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_rps.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 64e9317f58fb..71ee01d9ef64 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1001,6 +1001,10 @@ void intel_rps_dec_waiters(struct intel_rps *rps) if (rps_uses_slpc(rps)) { slpc = rps_to_slpc(rps); + /* Don't decrement num_waiters for req where increment was skipped */ + if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING) + return; + intel_guc_slpc_dec_waiters(slpc); } else { atomic_dec(&rps->num_waiters); @@ -1029,11 +1033,15 @@ void intel_rps_boost(struct i915_request *rq) if (slpc->power_profile == SLPC_POWER_PROFILES_POWER_SAVING) return; - if (slpc->min_freq_softlimit >= slpc->boost_freq) - return; - /* Return if old value is non zero */ if (!atomic_fetch_inc(&slpc->num_waiters)) { + /* + * Skip queuing boost work if frequency is already boosted, + * but still increment num_waiters. + */ + if (slpc->min_freq_softlimit >= slpc->boost_freq) + return; + GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", rq->fence.context, rq->fence.seqno); queue_work(rps_to_gt(rps)->i915->unordered_wq, From 2bb04ea9e5b7a2ef583c042ed5f8111804606e9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 2 May 2025 15:01:01 +0200 Subject: [PATCH 07/32] drm/ttm: Fix ttm_backup kerneldoc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The docs were not properly updated from an earlier version of the code. Fixes: e7b5d23e5d47 ("drm/ttm: Provide a shmem backup implementation") Cc: Christian König Cc: Matthew Brost Cc: Matthew Auld Cc: dri-devel@lists.freedesktop.org Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250502130101.3185-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/ttm/ttm_backup.c | 2 +- include/drm/ttm/ttm_backup.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_backup.c b/drivers/gpu/drm/ttm/ttm_backup.c index 93c007f18855..f58e7393888f 100644 --- a/drivers/gpu/drm/ttm/ttm_backup.c +++ b/drivers/gpu/drm/ttm/ttm_backup.c @@ -55,7 +55,7 @@ void ttm_backup_drop(struct ttm_backup *backup, pgoff_t handle) * @backup: The struct backup pointer used to back up the page. * @dst: The struct page to copy into. * @handle: The handle returned when the page was backed up. - * @intr: Try to perform waits interruptable or at least killable. + * @intr: Try to perform waits interruptible or at least killable. * * Return: 0 on success, Negative error code on failure, notably * -EINTR if @intr was set to true and a signal is pending. diff --git a/include/drm/ttm/ttm_backup.h b/include/drm/ttm/ttm_backup.h index 24ad120b8827..574b932177cc 100644 --- a/include/drm/ttm/ttm_backup.h +++ b/include/drm/ttm/ttm_backup.h @@ -16,7 +16,7 @@ struct ttm_backup; * @handle: The handle to convert. * * Converts an opaque handle received from the - * struct ttm_backoup_ops::backup_page() function to an (invalid) + * ttm_backup_backup_page() function to an (invalid) * struct page pointer suitable for a struct page array. * * Return: An (invalid) struct page pointer. @@ -45,8 +45,8 @@ static inline bool ttm_backup_page_ptr_is_handle(const struct page *page) * * Return: The handle that was previously used in * ttm_backup_handle_to_page_ptr() to obtain a struct page pointer, suitable - * for use as argument in the struct ttm_backup_ops drop() or - * copy_backed_up_page() functions. + * for use as argument in the struct ttm_backup_drop() or + * ttm_backup_copy_page() functions. */ static inline unsigned long ttm_backup_page_ptr_to_handle(const struct page *page) From d4ad53adfe21df1464bae5ed916085a69d6ffb3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 2 May 2025 15:00:14 +0200 Subject: [PATCH 08/32] drm/ttm: Remove the struct ttm_backup abstraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The abstraction was previously added to support separate ttm_backup implementations. However with the current implementation casting from a struct file to a struct ttm_backup, we run into trouble since struct file may have randomized the layout and gcc complains. Remove the struct ttm_backup abstraction Cc: dri-devel@lists.freedesktop.org Cc: Matthew Brost Cc: Dave Airlie Cc: Christian König Cc: Matthew Auld Cc: Al Viro Reported-by: Kees Cook Closes: https://lore.kernel.org/dri-devel/9c8dbbafdaf9f3f089da2cde5a772d69579b3795.camel@linux.intel.com/T/#mb153ab9216cb813b92bdeb36f391ad4808c2ba29 Suggested-by: Christian König Fixes: 70d645deac98 ("drm/ttm: Add helpers for shrinking") Signed-off-by: Thomas Hellström Reviewed-by: Christian König Link: https://lore.kernel.org/r/20250502130014.3156-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/ttm/ttm_backup.c | 42 +++++++++----------------------- drivers/gpu/drm/ttm/ttm_pool.c | 6 ++--- drivers/gpu/drm/ttm/ttm_tt.c | 2 +- include/drm/ttm/ttm_backup.h | 12 ++++----- include/drm/ttm/ttm_tt.h | 2 +- 5 files changed, 21 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_backup.c b/drivers/gpu/drm/ttm/ttm_backup.c index f58e7393888f..9e2d72c447ee 100644 --- a/drivers/gpu/drm/ttm/ttm_backup.c +++ b/drivers/gpu/drm/ttm/ttm_backup.c @@ -7,20 +7,6 @@ #include #include -/* - * Casting from randomized struct file * to struct ttm_backup * is fine since - * struct ttm_backup is never defined nor dereferenced. - */ -static struct file *ttm_backup_to_file(struct ttm_backup *backup) -{ - return (void *)backup; -} - -static struct ttm_backup *ttm_file_to_backup(struct file *file) -{ - return (void *)file; -} - /* * Need to map shmem indices to handle since a handle value * of 0 means error, following the swp_entry_t convention. @@ -40,12 +26,12 @@ static pgoff_t ttm_backup_handle_to_shmem_idx(pgoff_t handle) * @backup: The struct backup pointer used to obtain the handle * @handle: The handle obtained from the @backup_page function. */ -void ttm_backup_drop(struct ttm_backup *backup, pgoff_t handle) +void ttm_backup_drop(struct file *backup, pgoff_t handle) { loff_t start = ttm_backup_handle_to_shmem_idx(handle); start <<= PAGE_SHIFT; - shmem_truncate_range(file_inode(ttm_backup_to_file(backup)), start, + shmem_truncate_range(file_inode(backup), start, start + PAGE_SIZE - 1); } @@ -60,11 +46,10 @@ void ttm_backup_drop(struct ttm_backup *backup, pgoff_t handle) * Return: 0 on success, Negative error code on failure, notably * -EINTR if @intr was set to true and a signal is pending. */ -int ttm_backup_copy_page(struct ttm_backup *backup, struct page *dst, +int ttm_backup_copy_page(struct file *backup, struct page *dst, pgoff_t handle, bool intr) { - struct file *filp = ttm_backup_to_file(backup); - struct address_space *mapping = filp->f_mapping; + struct address_space *mapping = backup->f_mapping; struct folio *from_folio; pgoff_t idx = ttm_backup_handle_to_shmem_idx(handle); @@ -106,12 +91,11 @@ int ttm_backup_copy_page(struct ttm_backup *backup, struct page *dst, * the folio size- and usage. */ s64 -ttm_backup_backup_page(struct ttm_backup *backup, struct page *page, +ttm_backup_backup_page(struct file *backup, struct page *page, bool writeback, pgoff_t idx, gfp_t page_gfp, gfp_t alloc_gfp) { - struct file *filp = ttm_backup_to_file(backup); - struct address_space *mapping = filp->f_mapping; + struct address_space *mapping = backup->f_mapping; unsigned long handle = 0; struct folio *to_folio; int ret; @@ -161,9 +145,9 @@ ttm_backup_backup_page(struct ttm_backup *backup, struct page *page, * * After a call to this function, it's illegal to use the @backup pointer. */ -void ttm_backup_fini(struct ttm_backup *backup) +void ttm_backup_fini(struct file *backup) { - fput(ttm_backup_to_file(backup)); + fput(backup); } /** @@ -194,14 +178,10 @@ EXPORT_SYMBOL_GPL(ttm_backup_bytes_avail); * * Create a backup utilizing shmem objects. * - * Return: A pointer to a struct ttm_backup on success, + * Return: A pointer to a struct file on success, * an error pointer on error. */ -struct ttm_backup *ttm_backup_shmem_create(loff_t size) +struct file *ttm_backup_shmem_create(loff_t size) { - struct file *filp; - - filp = shmem_file_setup("ttm shmem backup", size, 0); - - return ttm_file_to_backup(filp); + return shmem_file_setup("ttm shmem backup", size, 0); } diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 83b10706ba89..c2ea865be657 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -506,7 +506,7 @@ static void ttm_pool_allocated_page_commit(struct page *allocated, * if successful, populate the page-table and dma-address arrays. */ static int ttm_pool_restore_commit(struct ttm_pool_tt_restore *restore, - struct ttm_backup *backup, + struct file *backup, const struct ttm_operation_ctx *ctx, struct ttm_pool_alloc_state *alloc) @@ -655,7 +655,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, pgoff_t start_page, pgoff_t end_page) { struct page **pages = &tt->pages[start_page]; - struct ttm_backup *backup = tt->backup; + struct file *backup = tt->backup; pgoff_t i, nr; for (i = start_page; i < end_page; i += nr, pages += nr) { @@ -963,7 +963,7 @@ void ttm_pool_drop_backed_up(struct ttm_tt *tt) long ttm_pool_backup(struct ttm_pool *pool, struct ttm_tt *tt, const struct ttm_backup_flags *flags) { - struct ttm_backup *backup = tt->backup; + struct file *backup = tt->backup; struct page *page; unsigned long handle; gfp_t alloc_gfp; diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index df0aa6c4b8b8..698cd4bf5e46 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -544,7 +544,7 @@ EXPORT_SYMBOL(ttm_tt_pages_limit); */ int ttm_tt_setup_backup(struct ttm_tt *tt) { - struct ttm_backup *backup = + struct file *backup = ttm_backup_shmem_create(((loff_t)tt->num_pages) << PAGE_SHIFT); if (WARN_ON_ONCE(!(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))) diff --git a/include/drm/ttm/ttm_backup.h b/include/drm/ttm/ttm_backup.h index 574b932177cc..c33cba111171 100644 --- a/include/drm/ttm/ttm_backup.h +++ b/include/drm/ttm/ttm_backup.h @@ -9,8 +9,6 @@ #include #include -struct ttm_backup; - /** * ttm_backup_handle_to_page_ptr() - Convert handle to struct page pointer * @handle: The handle to convert. @@ -55,20 +53,20 @@ ttm_backup_page_ptr_to_handle(const struct page *page) return (unsigned long)page >> 1; } -void ttm_backup_drop(struct ttm_backup *backup, pgoff_t handle); +void ttm_backup_drop(struct file *backup, pgoff_t handle); -int ttm_backup_copy_page(struct ttm_backup *backup, struct page *dst, +int ttm_backup_copy_page(struct file *backup, struct page *dst, pgoff_t handle, bool intr); s64 -ttm_backup_backup_page(struct ttm_backup *backup, struct page *page, +ttm_backup_backup_page(struct file *backup, struct page *page, bool writeback, pgoff_t idx, gfp_t page_gfp, gfp_t alloc_gfp); -void ttm_backup_fini(struct ttm_backup *backup); +void ttm_backup_fini(struct file *backup); u64 ttm_backup_bytes_avail(void); -struct ttm_backup *ttm_backup_shmem_create(loff_t size); +struct file *ttm_backup_shmem_create(loff_t size); #endif diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index 13cf47f3322f..406437ad674b 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -118,7 +118,7 @@ struct ttm_tt { * ttm_tt_create() callback is responsible for assigning * this field. */ - struct ttm_backup *backup; + struct file *backup; /** * @caching: The current caching state of the pages, see enum * ttm_caching. From 7c6fa1797a725732981f2d77711c867166737719 Mon Sep 17 00:00:00 2001 From: Kevin Baker Date: Mon, 5 May 2025 12:02:56 -0500 Subject: [PATCH 09/32] drm/panel: simple: Update timings for AUO G101EVN010 Switch to panel timings based on datasheet for the AUO G101EVN01.0 LVDS panel. Default timings were tested on the panel. Previous mode-based timings resulted in horizontal display shift. Signed-off-by: Kevin Baker Fixes: 4fb86404a977 ("drm/panel: simple: Add AUO G101EVN010 panel support") Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20250505170256.1385113-1-kevinb@ventureresearch.com Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250505170256.1385113-1-kevinb@ventureresearch.com --- drivers/gpu/drm/panel/panel-simple.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 232b03c1a259..33a37539de57 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -1027,27 +1027,28 @@ static const struct panel_desc auo_g070vvn01 = { }, }; -static const struct drm_display_mode auo_g101evn010_mode = { - .clock = 68930, - .hdisplay = 1280, - .hsync_start = 1280 + 82, - .hsync_end = 1280 + 82 + 2, - .htotal = 1280 + 82 + 2 + 84, - .vdisplay = 800, - .vsync_start = 800 + 8, - .vsync_end = 800 + 8 + 2, - .vtotal = 800 + 8 + 2 + 6, +static const struct display_timing auo_g101evn010_timing = { + .pixelclock = { 64000000, 68930000, 85000000 }, + .hactive = { 1280, 1280, 1280 }, + .hfront_porch = { 8, 64, 256 }, + .hback_porch = { 8, 64, 256 }, + .hsync_len = { 40, 168, 767 }, + .vactive = { 800, 800, 800 }, + .vfront_porch = { 4, 8, 100 }, + .vback_porch = { 4, 8, 100 }, + .vsync_len = { 8, 16, 223 }, }; static const struct panel_desc auo_g101evn010 = { - .modes = &auo_g101evn010_mode, - .num_modes = 1, + .timings = &auo_g101evn010_timing, + .num_timings = 1, .bpc = 6, .size = { .width = 216, .height = 135, }, .bus_format = MEDIA_BUS_FMT_RGB666_1X7X3_SPWG, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, .connector_type = DRM_MODE_CONNECTOR_LVDS, }; From 9984db63742099ee3f3cff35cf71306d10e64356 Mon Sep 17 00:00:00 2001 From: Roman Li Date: Mon, 14 Apr 2025 12:56:48 -0400 Subject: [PATCH 10/32] drm/amd/display: Fix invalid context error in dml helper [Why] "BUG: sleeping function called from invalid context" error. after: "drm/amd/display: Protect FPU in dml2_validate()/dml21_validate()" The populate_dml_plane_cfg_from_plane_state() uses the GFP_KERNEL flag for memory allocation, which shouldn't be used in atomic contexts. The allocation is needed only for using another helper function get_scaler_data_for_plane(). [How] Modify helpers to pass a pointer to scaler_data within existing context, eliminating the need for dynamic memory allocation/deallocation and copying. Fixes: 366e77cd4923 ("drm/amd/display: Protect FPU in dml2_validate()/dml21_validate()") Reviewed-by: Aurabindo Pillai Signed-off-by: Roman Li Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit bd3e84bc98f81b44f2c43936bdadc3241d654259) Cc: stable@vger.kernel.org --- .../amd/display/dc/dml2/dml2_translation_helper.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 2061d43b92e1..ab6baf269801 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -973,7 +973,9 @@ static void populate_dml_surface_cfg_from_plane_state(enum dml_project_id dml2_p } } -static void get_scaler_data_for_plane(const struct dc_plane_state *in, struct dc_state *context, struct scaler_data *out) +static struct scaler_data *get_scaler_data_for_plane( + const struct dc_plane_state *in, + struct dc_state *context) { int i; struct pipe_ctx *temp_pipe = &context->res_ctx.temp_pipe; @@ -994,7 +996,7 @@ static void get_scaler_data_for_plane(const struct dc_plane_state *in, struct dc } ASSERT(i < MAX_PIPES); - memcpy(out, &temp_pipe->plane_res.scl_data, sizeof(*out)); + return &temp_pipe->plane_res.scl_data; } static void populate_dummy_dml_plane_cfg(struct dml_plane_cfg_st *out, unsigned int location, @@ -1057,11 +1059,7 @@ static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out const struct dc_plane_state *in, struct dc_state *context, const struct soc_bounding_box_st *soc) { - struct scaler_data *scaler_data = kzalloc(sizeof(*scaler_data), GFP_KERNEL); - if (!scaler_data) - return; - - get_scaler_data_for_plane(in, context, scaler_data); + struct scaler_data *scaler_data = get_scaler_data_for_plane(in, context); out->CursorBPP[location] = dml_cur_32bit; out->CursorWidth[location] = 256; @@ -1126,8 +1124,6 @@ static void populate_dml_plane_cfg_from_plane_state(struct dml_plane_cfg_st *out out->DynamicMetadataTransmittedBytes[location] = 0; out->NumberOfCursors[location] = 1; - - kfree(scaler_data); } static unsigned int map_stream_to_dml_display_cfg(const struct dml2_context *dml2, From f1c6be3999d2be2673a51a9be0caf9348e254e52 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Wed, 16 Apr 2025 11:26:54 -0400 Subject: [PATCH 11/32] drm/amd/display: more liberal vmin/vmax update for freesync [Why] FAMS2 expects vmin/vmax to be updated in the case when freesync is off, but supported. But we only update it when freesync is enabled. [How] Change the vsync handler such that dc_stream_adjust_vmin_vmax() its called irrespective of whether freesync is enabled. If freesync is supported, then there is no harm in updating vmin/vmax registers. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3546 Reviewed-by: ChiaHsuan Chung Signed-off-by: Aurabindo Pillai Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit cfb2d41831ee5647a4ae0ea7c24971a92d5dfa0d) Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 536f73131c2d..93dd9e273c65 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -673,15 +673,21 @@ static void dm_crtc_high_irq(void *interrupt_params) spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags); if (acrtc->dm_irq_params.stream && - acrtc->dm_irq_params.vrr_params.supported && - acrtc->dm_irq_params.freesync_config.state == - VRR_STATE_ACTIVE_VARIABLE) { + acrtc->dm_irq_params.vrr_params.supported) { + bool replay_en = acrtc->dm_irq_params.stream->link->replay_settings.replay_feature_enabled; + bool psr_en = acrtc->dm_irq_params.stream->link->psr_settings.psr_feature_enabled; + bool fs_active_var_en = acrtc->dm_irq_params.freesync_config.state == VRR_STATE_ACTIVE_VARIABLE; + mod_freesync_handle_v_update(adev->dm.freesync_module, acrtc->dm_irq_params.stream, &acrtc->dm_irq_params.vrr_params); - dc_stream_adjust_vmin_vmax(adev->dm.dc, acrtc->dm_irq_params.stream, - &acrtc->dm_irq_params.vrr_params.adjust); + /* update vmin_vmax only if freesync is enabled, or only if PSR and REPLAY are disabled */ + if (fs_active_var_en || (!fs_active_var_en && !replay_en && !psr_en)) { + dc_stream_adjust_vmin_vmax(adev->dm.dc, + acrtc->dm_irq_params.stream, + &acrtc->dm_irq_params.vrr_params.adjust); + } } /* From 2a24755774ef8db33139e2d9a968cc585c6488da Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Wed, 23 Apr 2025 10:02:11 -0600 Subject: [PATCH 12/32] drm/amd/display: Remove unnecessary DC_FP_START/DC_FP_END [WHY & HOW] Remove the unnecessary DC_FP_START/DC_FP_END pair to reduce time in preempt_disable. It also fixes "BUG: sleeping function called from invalid context" error messages because of calling kzalloc with GFP_KERNEL which can sleep. Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Hung Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 94da0735b67b3ada90a3e2e017d306f070306f1d) --- .../gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 2a59cc61ed8c..944650cb13de 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -2114,8 +2114,6 @@ static bool dcn32_resource_construct( #define REG_STRUCT dccg_regs dccg_regs_init(); - DC_FP_START(); - ctx->dc_bios->regs = &bios_regs; pool->base.res_cap = &res_cap_dcn32; @@ -2501,14 +2499,10 @@ static bool dcn32_resource_construct( if (ASICREV_IS_GC_11_0_3(dc->ctx->asic_id.hw_internal_rev) && (dc->config.sdpif_request_limit_words_per_umc == 0)) dc->config.sdpif_request_limit_words_per_umc = 16; - DC_FP_END(); - return true; create_fail: - DC_FP_END(); - dcn32_resource_destruct(pool); return false; From eba692ca3abca258b3214a6e4126afefad1822f0 Mon Sep 17 00:00:00 2001 From: Austin Zheng Date: Thu, 17 Apr 2025 10:24:29 -0400 Subject: [PATCH 13/32] drm/amd/display: Call FP Protect Before Mode Programming/Mode Support [Why] Memory allocation occurs within dml21_validate() for adding phantom planes. May cause kernel to be tainted due to usage of FP Start. [How] Move FP start from dml21_validate to before mode programming/mode support. Calculations requiring floating point are all done within mode programming or mode support. Reviewed-by: Alvin Lee Signed-off-by: Austin Zheng Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit fe3250f10819b411808ab9ae1d824c5fc9b59170) --- drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c index 5d16f36ec95c..ed6584535e89 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_wrapper.c @@ -234,7 +234,9 @@ static bool dml21_mode_check_and_programming(const struct dc *in_dc, struct dc_s if (!result) return false; + DC_FP_START(); result = dml2_build_mode_programming(mode_programming); + DC_FP_END(); if (!result) return false; @@ -277,7 +279,9 @@ static bool dml21_check_mode_support(const struct dc *in_dc, struct dc_state *co mode_support->dml2_instance = dml_init->dml2_instance; dml21_map_dc_state_into_dml_display_cfg(in_dc, context, dml_ctx); dml_ctx->v21.mode_programming.dml2_instance->scratch.build_mode_programming_locals.mode_programming_params.programming = dml_ctx->v21.mode_programming.programming; + DC_FP_START(); is_supported = dml2_check_mode_supported(mode_support); + DC_FP_END(); if (!is_supported) return false; @@ -288,16 +292,12 @@ bool dml21_validate(const struct dc *in_dc, struct dc_state *context, struct dml { bool out = false; - DC_FP_START(); - /* Use dml_validate_only for fast_validate path */ if (fast_validate) out = dml21_check_mode_support(in_dc, context, dml_ctx); else out = dml21_mode_check_and_programming(in_dc, context, dml_ctx); - DC_FP_END(); - return out; } From 5a3846648c0523fd850b7f0aec78c0139453ab8b Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Fri, 18 Apr 2025 16:31:59 +0800 Subject: [PATCH 14/32] drm/amd/display: Shift DMUB AUX reply command if necessary [Why] Defined value of dmub AUX reply command field get updated but didn't adjust dm receiving side accordingly. [How] Check the received reply command value to see if it's updated version or not. Adjust it if necessary. Fixes: ead08b95fa50 ("drm/amd/display: Fix race condition in DPIA AUX transfer") Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Ray Wu Signed-off-by: Wayne Lin Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit d5c9ade755a9afa210840708a12a8f44c0d532f4) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 93dd9e273c65..d9292da56948 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -12758,8 +12758,11 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( goto out; } + payload->reply[0] = adev->dm.dmub_notify->aux_reply.command & 0xF; + if (adev->dm.dmub_notify->aux_reply.command & 0xF0) + /* The reply is stored in the top nibble of the command. */ + payload->reply[0] = (adev->dm.dmub_notify->aux_reply.command >> 4) & 0xF; - payload->reply[0] = adev->dm.dmub_notify->aux_reply.command; if (!payload->write && p_notify->aux_reply.length && (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) { From bc70e11b550d37fbd9eaed0f113ba560894f1609 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Sun, 20 Apr 2025 16:29:07 +0800 Subject: [PATCH 15/32] drm/amd/display: Fix the checking condition in dmub aux handling [Why & How] Fix the checking condition for detecting AUX_RET_ERROR_PROTOCOL_ERROR. It was wrongly checking by "not equals to" Reviewed-by: Ray Wu Signed-off-by: Wayne Lin Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 1db6c9e9b62e1a8912f0a281c941099fca678da3) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d9292da56948..68bbe7d57773 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -12749,7 +12749,7 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( * Transient states before tunneling is enabled could * lead to this error. We can ignore this for now. */ - if (p_notify->result != AUX_RET_ERROR_PROTOCOL_ERROR) { + if (p_notify->result == AUX_RET_ERROR_PROTOCOL_ERROR) { DRM_WARN("DPIA AUX failed on 0x%x(%d), error %d\n", payload->address, payload->length, p_notify->result); From 396dc51b3b7ea524bf8061f478332d0039e96d5d Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Sun, 20 Apr 2025 16:56:54 +0800 Subject: [PATCH 16/32] drm/amd/display: Remove incorrect checking in dmub aux handler [Why & How] "Request length != reply length" is expected behavior defined in spec. It's not an invalid reply. Besides, replied data handling logic is not designed to be written in amdgpu_dm_process_dmub_aux_transfer_sync(). Remove the incorrectly handling section. Fixes: ead08b95fa50 ("drm/amd/display: Fix race condition in DPIA AUX transfer") Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Ray Wu Signed-off-by: Wayne Lin Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 81b5c6fa62af62fe89ae9576f41aae37830b94cb) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 68bbe7d57773..d63f219de88e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -12764,19 +12764,9 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( payload->reply[0] = (adev->dm.dmub_notify->aux_reply.command >> 4) & 0xF; if (!payload->write && p_notify->aux_reply.length && - (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) { - - if (payload->length != p_notify->aux_reply.length) { - DRM_WARN("invalid read length %d from DPIA AUX 0x%x(%d)!\n", - p_notify->aux_reply.length, - payload->address, payload->length); - *operation_result = AUX_RET_ERROR_INVALID_REPLY; - goto out; - } - + (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) memcpy(payload->data, p_notify->aux_reply.data, p_notify->aux_reply.length); - } /* success */ ret = p_notify->aux_reply.length; From 3924f45d4de7250a603fd7b50379237a6a0e5adf Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Sun, 20 Apr 2025 17:50:03 +0800 Subject: [PATCH 17/32] drm/amd/display: Copy AUX read reply data whenever length > 0 [Why] amdgpu_dm_process_dmub_aux_transfer_sync() should return all exact data reply from the sink side. Don't do the analysis job in it. [How] Remove unnecessary check condition AUX_TRANSACTION_REPLY_AUX_ACK. Fixes: ead08b95fa50 ("drm/amd/display: Fix race condition in DPIA AUX transfer") Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Ray Wu Signed-off-by: Wayne Lin Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 9b540e3fe6796fec4fb1344f3be8952fc2f084d4) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d63f219de88e..64df8ca448b3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -12763,8 +12763,7 @@ int amdgpu_dm_process_dmub_aux_transfer_sync( /* The reply is stored in the top nibble of the command. */ payload->reply[0] = (adev->dm.dmub_notify->aux_reply.command >> 4) & 0xF; - if (!payload->write && p_notify->aux_reply.length && - (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) + if (!payload->write && p_notify->aux_reply.length) memcpy(payload->data, p_notify->aux_reply.data, p_notify->aux_reply.length); From 65924ec69b29296845c7f628112353438e63ea56 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Sun, 20 Apr 2025 19:22:14 +0800 Subject: [PATCH 18/32] drm/amd/display: Fix wrong handling for AUX_DEFER case [Why] We incorrectly ack all bytes get written when the reply actually is defer. When it's defer, means sink is not ready for the request. We should retry the request. [How] Only reply all data get written when receive I2C_ACK|AUX_ACK. Otherwise, reply the number of actual written bytes received from the sink. Add some messages to facilitate debugging as well. Fixes: ad6756b4d773 ("drm/amd/display: Shift dc link aux to aux_payload") Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Ray Wu Signed-off-by: Wayne Lin Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 3637e457eb0000bc37d8bbbec95964aad2fb29fd) Cc: stable@vger.kernel.org --- .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 7ceedf626d23..074b79fd5822 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -51,6 +51,9 @@ #define PEAK_FACTOR_X1000 1006 +/* + * This function handles both native AUX and I2C-Over-AUX transactions. + */ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) { @@ -87,15 +90,25 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, if (adev->dm.aux_hpd_discon_quirk) { if (msg->address == DP_SIDEBAND_MSG_DOWN_REQ_BASE && operation_result == AUX_RET_ERROR_HPD_DISCON) { - result = 0; + result = msg->size; operation_result = AUX_RET_SUCCESS; } } - if (payload.write && result >= 0) - result = msg->size; + /* + * result equals to 0 includes the cases of AUX_DEFER/I2C_DEFER + */ + if (payload.write && result >= 0) { + if (result) { + /*one byte indicating partially written bytes. Force 0 to retry*/ + drm_info(adev_to_drm(adev), "amdgpu: AUX partially written\n"); + result = 0; + } else if (!payload.reply[0]) + /*I2C_ACK|AUX_ACK*/ + result = msg->size; + } - if (result < 0) + if (result < 0) { switch (operation_result) { case AUX_RET_SUCCESS: break; @@ -114,6 +127,13 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, break; } + drm_info(adev_to_drm(adev), "amdgpu: DP AUX transfer fail:%d\n", operation_result); + } + + if (payload.reply[0]) + drm_info(adev_to_drm(adev), "amdgpu: AUX reply command not ACK: 0x%02x.", + payload.reply[0]); + return result; } From b7e84fb708392b37e5dbb2a95db9b94a0e3f0aa2 Mon Sep 17 00:00:00 2001 From: Ruijing Dong Date: Fri, 2 May 2025 11:19:26 -0400 Subject: [PATCH 19/32] drm/amdgpu/vcn: using separate VCN1_AON_SOC offset VCN1_AON_SOC_ADDRESS_3_0 offset varies on different VCN generations, the issue in vcn4.0.5 is caused by a different VCN1_AON_SOC_ADDRESS_3_0 offset. This patch does the following: 1. use the same offset for other VCN generations. 2. use the vcn4.0.5 special offset 3. update vcn_4_0 and vcn_5_0 Acked-by: Saleemkhan Jamadar Reviewed-by: Leo Liu Signed-off-by: Ruijing Dong Signed-off-by: Alex Deucher (cherry picked from commit 5c89ceda9984498b28716944633a9a01cbb2c90d) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 1 - drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 4 +++- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 1 + drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 3 ++- 8 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index cdcdae7f71ce..83adf81defc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -66,7 +66,6 @@ #define VCN_ENC_CMD_REG_WAIT 0x0000000c #define VCN_AON_SOC_ADDRESS_2_0 0x1f800 -#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define VCN_VID_IP_ADDRESS_2_0 0x0 #define VCN_AON_IP_ADDRESS_2_0 0x30000 diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 8e7a36f26e9c..b8d835c9e17e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -39,6 +39,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fa00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48200 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x1fd #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x503 diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index d716510b8dd6..3eec1b8feaee 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -39,6 +39,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fa00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48200 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27 #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 22ae1939476f..0b19f0ab4480 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -40,6 +40,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fa00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48200 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27 #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index c6f6392c1c20..1f777c125b00 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -46,6 +46,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 #define VCN_HARVEST_MMSCH 0 @@ -614,7 +615,8 @@ static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst, /* VCN global tiling registers */ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( - VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG), + adev->gfx.config.gb_addr_config, 0, indirect); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 3e176b4b7c69..012f6ea928ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -45,6 +45,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define VCN1_AON_SOC_ADDRESS_3_0 0x48000 static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_3[] = { SOC15_REG_ENTRY_STR(VCN, 0, regUVD_POWER_STATUS), diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index ba603b2246e2..a1171e6152ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -46,6 +46,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 (0x48300 + 0x38000) +#define VCN1_AON_SOC_ADDRESS_3_0 (0x48000 + 0x38000) #define VCN_HARVEST_MMSCH 0 diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index d99d05f42f1d..b90da3d3e140 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -533,7 +533,8 @@ static void vcn_v5_0_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst, /* VCN global tiling registers */ WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( - VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG), + adev->gfx.config.gb_addr_config, 0, indirect); return; } From d0ce1aaa8531a4a4707711cab5721374751c51b0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 1 May 2025 13:00:16 -0400 Subject: [PATCH 20/32] Revert "drm/amd: Stop evicting resources on APUs in suspend" This reverts commit 3a9626c816db901def438dc2513622e281186d39. This breaks S4 because we end up setting the s3/s0ix flags even when we are entering s4 since prepare is used by both flows. The causes both the S3/s0ix and s4 flags to be set which breaks several checks in the driver which assume they are mutually exclusive. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3634 Cc: Mario Limonciello Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit ce8f7d95899c2869b47ea6ce0b3e5bf304b2fff4) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 -- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 18 ------------------ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 ++--------- 3 files changed, 2 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index ef6e78224fdf..c3641331d4de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1614,11 +1614,9 @@ static inline void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_cap #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND) bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev); bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev); -void amdgpu_choose_low_power_state(struct amdgpu_device *adev); #else static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; } static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; } -static inline void amdgpu_choose_low_power_state(struct amdgpu_device *adev) { } #endif void amdgpu_register_gpu_instance(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index b7f8f2ff143d..707e131f89d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1533,22 +1533,4 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) #endif /* CONFIG_AMD_PMC */ } -/** - * amdgpu_choose_low_power_state - * - * @adev: amdgpu_device_pointer - * - * Choose the target low power state for the GPU - */ -void amdgpu_choose_low_power_state(struct amdgpu_device *adev) -{ - if (adev->in_runpm) - return; - - if (amdgpu_acpi_is_s0ix_active(adev)) - adev->in_s0ix = true; - else if (amdgpu_acpi_is_s3_active(adev)) - adev->in_s3 = true; -} - #endif /* CONFIG_SUSPEND */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 7f354cd532dc..5ac7bd5942d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4949,15 +4949,13 @@ int amdgpu_device_prepare(struct drm_device *dev) struct amdgpu_device *adev = drm_to_adev(dev); int i, r; - amdgpu_choose_low_power_state(adev); - if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; /* Evict the majority of BOs before starting suspend sequence */ r = amdgpu_device_evict_resources(adev); if (r) - goto unprepare; + return r; flush_delayed_work(&adev->gfx.gfx_off_delay_work); @@ -4968,15 +4966,10 @@ int amdgpu_device_prepare(struct drm_device *dev) continue; r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]); if (r) - goto unprepare; + return r; } return 0; - -unprepare: - adev->in_s0ix = adev->in_s3 = adev->in_s4 = false; - - return r; } /** From 4aaffc85751da5722e858e4333e8cf0aa4b6c78f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 1 May 2025 13:46:46 -0400 Subject: [PATCH 21/32] drm/amdgpu: fix pm notifier handling Set the s3/s0ix and s4 flags in the pm notifier so that we can skip the resource evictions properly in pm prepare based on whether we are suspending or hibernating. Drop the eviction as processes are not frozen at this time, we we can end up getting stuck trying to evict VRAM while applications continue to submit work which causes the buffers to get pulled back into VRAM. v2: Move suspend flags out of pm notifier (Mario) Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4178 Fixes: 2965e6355dcd ("drm/amd: Add Suspend/Hibernate notification callback support") Cc: Mario Limonciello Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit 06f2dcc241e7e5c681f81fbc46cacdf4bfd7d6d7) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 18 +++++------------- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 +--------- 2 files changed, 6 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5ac7bd5942d0..f8b3e04d71ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4907,28 +4907,20 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) * @data: data * * This function is called when the system is about to suspend or hibernate. - * It is used to evict resources from the device before the system goes to - * sleep while there is still access to swap. + * It is used to set the appropriate flags so that eviction can be optimized + * in the pm prepare callback. */ static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode, void *data) { struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb); - int r; switch (mode) { case PM_HIBERNATION_PREPARE: adev->in_s4 = true; - fallthrough; - case PM_SUSPEND_PREPARE: - r = amdgpu_device_evict_resources(adev); - /* - * This is considered non-fatal at this time because - * amdgpu_device_prepare() will also fatally evict resources. - * See https://gitlab.freedesktop.org/drm/amd/-/issues/3781 - */ - if (r) - drm_warn(adev_to_drm(adev), "Failed to evict resources, freeze active processes if problems occur: %d\n", r); + break; + case PM_POST_HIBERNATION: + adev->in_s4 = false; break; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 24ee4710f807..72c807f5822e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2615,13 +2615,8 @@ static int amdgpu_pmops_freeze(struct device *dev) static int amdgpu_pmops_thaw(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); - struct amdgpu_device *adev = drm_to_adev(drm_dev); - int r; - - r = amdgpu_device_resume(drm_dev, true); - adev->in_s4 = false; - return r; + return amdgpu_device_resume(drm_dev, true); } static int amdgpu_pmops_poweroff(struct device *dev) @@ -2634,9 +2629,6 @@ static int amdgpu_pmops_poweroff(struct device *dev) static int amdgpu_pmops_restore(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); - struct amdgpu_device *adev = drm_to_adev(drm_dev); - - adev->in_s4 = false; return amdgpu_device_resume(drm_dev, true); } From f690e3974755a650259a45d71456decc9c96a282 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Apr 2025 12:45:04 -0400 Subject: [PATCH 22/32] drm/amdgpu/hdp4: use memcfg register to post the write for HDP flush Reading back the remapped HDP flush register seems to cause problems on some platforms. All we need is a read, so read back the memcfg register. Fixes: c9b8dcabb52a ("drm/amdgpu/hdp4.0: do a posting read when flushing HDP") Reported-by: Alexey Klimov Link: https://lists.freedesktop.org/archives/amd-gfx/2025-April/123150.html Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4119 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3908 Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit 5c937b4a6050316af37ef214825b6340b5e9e391) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index f1dc13b3ab38..cbbeadeb53f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -41,7 +41,12 @@ static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } From 0e33e0f339b91eecd9558311449a3d1e728722d4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Apr 2025 12:46:56 -0400 Subject: [PATCH 23/32] drm/amdgpu/hdp5: use memcfg register to post the write for HDP flush Reading back the remapped HDP flush register seems to cause problems on some platforms. All we need is a read, so read back the memcfg register. Fixes: cf424020e040 ("drm/amdgpu/hdp5.0: do a posting read when flushing HDP") Reported-by: Alexey Klimov Link: https://lists.freedesktop.org/archives/amd-gfx/2025-April/123150.html Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4119 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3908 Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit a5cb344033c7598762e89255e8ff52827abb57a4) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c index 43195c079748..086a647308df 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c @@ -32,7 +32,12 @@ static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } From dbc988c689333faeeed44d5561f372ff20395304 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Apr 2025 12:47:37 -0400 Subject: [PATCH 24/32] drm/amdgpu/hdp5.2: use memcfg register to post the write for HDP flush Reading back the remapped HDP flush register seems to cause problems on some platforms. All we need is a read, so read back the memcfg register. Fixes: f756dbac1ce1 ("drm/amdgpu/hdp5.2: do a posting read when flushing HDP") Reported-by: Alexey Klimov Link: https://lists.freedesktop.org/archives/amd-gfx/2025-April/123150.html Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4119 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3908 Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit 4a89b7698e771914b4d5b571600c76e2fdcbe2a9) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c index fcb8dd2876bc..40940b4ab400 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c @@ -33,7 +33,17 @@ static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev, if (!ring || !ring->funcs->emit_wreg) { WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + if (amdgpu_sriov_vf(adev)) { + /* this is fine because SR_IOV doesn't remap the register */ + RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + } else { + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); + } } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, From ca28e80abe4219c8f1a2961ae05102d70af6dc87 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Apr 2025 12:48:51 -0400 Subject: [PATCH 25/32] drm/amdgpu/hdp6: use memcfg register to post the write for HDP flush Reading back the remapped HDP flush register seems to cause problems on some platforms. All we need is a read, so read back the memcfg register. Fixes: abe1cbaec6cf ("drm/amdgpu/hdp6.0: do a posting read when flushing HDP") Reported-by: Alexey Klimov Link: https://lists.freedesktop.org/archives/amd-gfx/2025-April/123150.html Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4119 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3908 Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit 84141ff615951359c9a99696fd79a36c465ed847) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c index a88d25a06c29..6ccd31c8bc69 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c @@ -35,7 +35,12 @@ static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } From 5a11a2767731139bf87e667331aa2209e33a1d19 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Apr 2025 12:50:02 -0400 Subject: [PATCH 26/32] drm/amdgpu/hdp7: use memcfg register to post the write for HDP flush Reading back the remapped HDP flush register seems to cause problems on some platforms. All we need is a read, so read back the memcfg register. Fixes: 689275140cb8 ("drm/amdgpu/hdp7.0: do a posting read when flushing HDP") Reported-by: Alexey Klimov Link: https://lists.freedesktop.org/archives/amd-gfx/2025-April/123150.html Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4119 Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3908 Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit dbc064adfcf9095e7d895bea87b2f75c1ab23236) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c index 49f7eb4fbd11..2c9239a22f39 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c @@ -32,7 +32,12 @@ static void hdp_v7_0_flush_hdp(struct amdgpu_device *adev, { if (!ring || !ring->funcs->emit_wreg) { WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2); + /* We just need to read back a register to post the write. + * Reading back the remapped register causes problems on + * some platforms so just read back the memory size register. + */ + if (adev->nbio.funcs->get_memsize) + adev->nbio.funcs->get_memsize(adev); } else { amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); } From 391008f34e711253c5983b0bf52277cc43723127 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 8 Apr 2025 08:59:15 -0700 Subject: [PATCH 27/32] drm/xe: Add page queue multiplier For an unknown reason the math to determine the PF queue size does is not correct - compute UMD applications are overflowing the PF queue which is fatal. A multippier of 8 fixes the problem. Fixes: 3338e4f90c14 ("drm/xe: Use topology to determine page fault queue size") Cc: stable@vger.kernel.org Signed-off-by: Matthew Brost Reviewed-by: Jagmeet Randhawa Link: https://lore.kernel.org/r/20250408155915.78770-1-matthew.brost@intel.com (cherry picked from commit 29582e0ea75c95668d168b12406e3c56cf5a73c4) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index c5ad9a0a89c2..0c22b3a36655 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -435,9 +435,16 @@ static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss, XE_MAX_EU_FUSE_BITS) * num_dss; - /* user can issue separate page faults per EU and per CS */ + /* + * user can issue separate page faults per EU and per CS + * + * XXX: Multiplier required as compute UMD are getting PF queue errors + * without it. Follow on why this multiplier is required. + */ +#define PF_MULTIPLIER 8 pf_queue->num_dw = - (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW; + (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER; +#undef PF_MULTIPLIER pf_queue->gt = gt; pf_queue->data = devm_kcalloc(xe->drm.dev, pf_queue->num_dw, From 51c0ee84e4dc339287b2d7335f2b54d747794c83 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Mon, 28 Apr 2025 13:53:57 +0530 Subject: [PATCH 28/32] drm/xe/tests/mocs: Hold XE_FORCEWAKE_ALL for LNCF regs LNCF registers report wrong values when XE_FORCEWAKE_GT only is held. Holding XE_FORCEWAKE_ALL ensures correct operations on LNCF regs. V2(Himal): - Use xe_force_wake_ref_has_domain Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/1999 Fixes: a6a4ea6d7d37 ("drm/xe: Add mocs kunit") Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20250428082357.1730068-1-tejas.upadhyay@intel.com Signed-off-by: Tejas Upadhyay (cherry picked from commit 70a2585e582058e94fe4381a337be42dec800337) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/tests/xe_mocs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c index ef1e5256c56a..0e502feaca81 100644 --- a/drivers/gpu/drm/xe/tests/xe_mocs.c +++ b/drivers/gpu/drm/xe/tests/xe_mocs.c @@ -46,8 +46,11 @@ static void read_l3cc_table(struct xe_gt *gt, unsigned int fw_ref, i; u32 reg_val; - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - KUNIT_ASSERT_NE_MSG(test, fw_ref, 0, "Forcewake Failed.\n"); + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + xe_force_wake_put(gt_to_fw(gt), fw_ref); + KUNIT_ASSERT_TRUE_MSG(test, true, "Forcewake Failed.\n"); + } for (i = 0; i < info->num_mocs_regs; i++) { if (!(i & 1)) { From 03552d8ac0afcc080c339faa0b726e2c0e9361cb Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 2 May 2025 08:51:04 -0700 Subject: [PATCH 29/32] drm/xe/gsc: do not flush the GSC worker from the reset path The workqueue used for the reset worker is marked as WQ_MEM_RECLAIM, while the GSC one isn't (and can't be as we need to do memory allocations in the gsc worker). Therefore, we can't flush the latter from the former. The reason why we had such a flush was to avoid interrupting either the GSC FW load or in progress GSC proxy operations. GSC proxy operations fall into 2 categories: 1) GSC proxy init: this only happens once immediately after GSC FW load and does not support being interrupted. The only way to recover from an interruption of the proxy init is to do an FLR and re-load the GSC. 2) GSC proxy request: this can happen in response to a request that the driver sends to the GSC. If this is interrupted, the GSC FW will timeout and the driver request will be failed, but overall the GSC will keep working fine. Flushing the work allowed us to avoid interruption in both cases (unless the hang came from the GSC engine itself, in which case we're toast anyway). However, a failure on a proxy request is tolerable if we're in a scenario where we're triggering a GT reset (i.e., something is already gone pretty wrong), so what we really need to avoid is interrupting the init flow, which we can do by polling on the register that reports when the proxy init is complete (as that ensure us that all the load and init operations have been completed). Note that during suspend we still want to do a flush of the worker to make sure it completes any operations involving the HW before the power is cut. v2: fix spelling in commit msg, rename waiter function (Julia) Fixes: dd0e89e5edc2 ("drm/xe/gsc: GSC FW load") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4830 Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Cc: Alan Previn Cc: # v6.8+ Reviewed-by: Julia Filipchuk Link: https://lore.kernel.org/r/20250502155104.2201469-1-daniele.ceraolospurio@intel.com (cherry picked from commit 12370bfcc4f0bdf70279ec5b570eb298963422b5) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gsc.c | 22 ++++++++++++++++++++++ drivers/gpu/drm/xe/xe_gsc.h | 1 + drivers/gpu/drm/xe/xe_gsc_proxy.c | 11 +++++++++++ drivers/gpu/drm/xe/xe_gsc_proxy.h | 1 + drivers/gpu/drm/xe/xe_gt.c | 2 +- drivers/gpu/drm/xe/xe_uc.c | 8 +++++++- drivers/gpu/drm/xe/xe_uc.h | 1 + 7 files changed, 44 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index fd41113f8572..0bcf97063ff6 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -555,6 +555,28 @@ void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc) flush_work(&gsc->work); } +void xe_gsc_stop_prepare(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + int ret; + + if (!xe_uc_fw_is_loadable(&gsc->fw) || xe_uc_fw_is_in_error_state(&gsc->fw)) + return; + + xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GSC); + + /* + * If the GSC FW load or the proxy init are interrupted, the only way + * to recover it is to do an FLR and reload the GSC from scratch. + * Therefore, let's wait for the init to complete before stopping + * operations. The proxy init is the last step, so we can just wait on + * that + */ + ret = xe_gsc_wait_for_proxy_init_done(gsc); + if (ret) + xe_gt_err(gt, "failed to wait for GSC init completion before uc stop\n"); +} + /* * wa_14015076503: if the GSC FW is loaded, we need to alert it before doing a * GSC engine reset by writing a notification bit in the GS1 register and then diff --git a/drivers/gpu/drm/xe/xe_gsc.h b/drivers/gpu/drm/xe/xe_gsc.h index d99f66c38075..b8b8e0810ad9 100644 --- a/drivers/gpu/drm/xe/xe_gsc.h +++ b/drivers/gpu/drm/xe/xe_gsc.h @@ -16,6 +16,7 @@ struct xe_hw_engine; int xe_gsc_init(struct xe_gsc *gsc); int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc); void xe_gsc_wait_for_worker_completion(struct xe_gsc *gsc); +void xe_gsc_stop_prepare(struct xe_gsc *gsc); void xe_gsc_load_start(struct xe_gsc *gsc); void xe_gsc_hwe_irq_handler(struct xe_hw_engine *hwe, u16 intr_vec); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index 8cf70b228ff3..d0519cd6704a 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -71,6 +71,17 @@ bool xe_gsc_proxy_init_done(struct xe_gsc *gsc) HECI1_FWSTS1_PROXY_STATE_NORMAL; } +int xe_gsc_wait_for_proxy_init_done(struct xe_gsc *gsc) +{ + struct xe_gt *gt = gsc_to_gt(gsc); + + /* Proxy init can take up to 500ms, so wait double that for safety */ + return xe_mmio_wait32(>->mmio, HECI_FWSTS1(MTL_GSC_HECI1_BASE), + HECI1_FWSTS1_CURRENT_STATE, + HECI1_FWSTS1_PROXY_STATE_NORMAL, + USEC_PER_SEC, NULL, false); +} + static void __gsc_proxy_irq_rmw(struct xe_gsc *gsc, u32 clr, u32 set) { struct xe_gt *gt = gsc_to_gt(gsc); diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.h b/drivers/gpu/drm/xe/xe_gsc_proxy.h index fdef56995cd4..765602221dbc 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.h +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.h @@ -12,6 +12,7 @@ struct xe_gsc; int xe_gsc_proxy_init(struct xe_gsc *gsc); bool xe_gsc_proxy_init_done(struct xe_gsc *gsc); +int xe_gsc_wait_for_proxy_init_done(struct xe_gsc *gsc); int xe_gsc_proxy_start(struct xe_gsc *gsc); int xe_gsc_proxy_request_handler(struct xe_gsc *gsc); diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 10a9e3c72b36..66198cf2662c 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -857,7 +857,7 @@ void xe_gt_suspend_prepare(struct xe_gt *gt) fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - xe_uc_stop_prepare(>->uc); + xe_uc_suspend_prepare(>->uc); xe_force_wake_put(gt_to_fw(gt), fw_ref); } diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index c14bd2282044..3a8751a8b92d 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -244,7 +244,7 @@ void xe_uc_gucrc_disable(struct xe_uc *uc) void xe_uc_stop_prepare(struct xe_uc *uc) { - xe_gsc_wait_for_worker_completion(&uc->gsc); + xe_gsc_stop_prepare(&uc->gsc); xe_guc_stop_prepare(&uc->guc); } @@ -278,6 +278,12 @@ static void uc_reset_wait(struct xe_uc *uc) goto again; } +void xe_uc_suspend_prepare(struct xe_uc *uc) +{ + xe_gsc_wait_for_worker_completion(&uc->gsc); + xe_guc_stop_prepare(&uc->guc); +} + int xe_uc_suspend(struct xe_uc *uc) { /* GuC submission not enabled, nothing to do */ diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index 3813c1ede450..c23e6f5e2514 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -18,6 +18,7 @@ int xe_uc_reset_prepare(struct xe_uc *uc); void xe_uc_stop_prepare(struct xe_uc *uc); void xe_uc_stop(struct xe_uc *uc); int xe_uc_start(struct xe_uc *uc); +void xe_uc_suspend_prepare(struct xe_uc *uc); int xe_uc_suspend(struct xe_uc *uc); int xe_uc_sanitize_reset(struct xe_uc *uc); void xe_uc_declare_wedged(struct xe_uc *uc); From 9d271a4f5ba52520e448ab223b1a91c6e35f17c7 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Wed, 7 May 2025 02:23:02 +0000 Subject: [PATCH 30/32] drm/xe: Release force wake first then runtime power xe_force_wake_get() is dependent on xe_pm_runtime_get(), so for the release path, xe_force_wake_put() should be called first then xe_pm_runtime_put(). Combine the error path and normal path together with goto. Fixes: 85d547608ef5 ("drm/xe/xe_gt_debugfs: Update handling of xe_force_wake_get return") Cc: Himal Prasad Ghimiray Cc: Rodrigo Vivi Signed-off-by: Shuicheng Lin Reviewed-by: Himal Prasad Ghimiray Link: https://lore.kernel.org/r/20250507022302.2187527-1-shuicheng.lin@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 432cd94efdca06296cc5e76d673546f58aa90ee1) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_debugfs.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index 2d63a69cbfa3..f7005a3643e6 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -92,22 +92,23 @@ static int hw_engines(struct xe_gt *gt, struct drm_printer *p) struct xe_hw_engine *hwe; enum xe_hw_engine_id id; unsigned int fw_ref; + int ret = 0; xe_pm_runtime_get(xe); fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { - xe_pm_runtime_put(xe); - xe_force_wake_put(gt_to_fw(gt), fw_ref); - return -ETIMEDOUT; + ret = -ETIMEDOUT; + goto fw_put; } for_each_hw_engine(hwe, gt, id) xe_hw_engine_print(hwe, p); +fw_put: xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_pm_runtime_put(xe); - return 0; + return ret; } static int powergate_info(struct xe_gt *gt, struct drm_printer *p) From 564467e9d06c6352fac9100e8957d40a1a50234c Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Fri, 2 May 2025 17:00:52 +0000 Subject: [PATCH 31/32] drm/xe: Add config control for svm flush work Without CONFIG_DRM_XE_GPUSVM set, GPU SVM is not initialized thus below warning pops. Refine the flush work code to be controlled by the config to avoid below warning: " [ 453.132028] ------------[ cut here ]------------ [ 453.132527] WARNING: CPU: 9 PID: 4491 at kernel/workqueue.c:4205 __flush_work+0x379/0x3a0 [ 453.133355] Modules linked in: xe drm_ttm_helper ttm gpu_sched drm_buddy drm_suballoc_helper drm_gpuvm drm_exec [ 453.134352] CPU: 9 UID: 0 PID: 4491 Comm: xe_exec_mix_mod Tainted: G U W 6.15.0-rc3+ #7 PREEMPT(full) [ 453.135405] Tainted: [U]=USER, [W]=WARN ... [ 453.136921] RIP: 0010:__flush_work+0x379/0x3a0 [ 453.137417] Code: 8b 45 00 48 8b 55 08 89 c7 48 c1 e8 04 83 e7 08 83 e0 0f 83 cf 02 89 c6 48 0f ba 6d 00 03 e9 d5 fe ff ff 0f 0b e9 db fd ff ff <0f> 0b 45 31 e4 e9 d1 fd ff ff 0f 0b e9 03 ff ff ff 0f 0b e9 d6 fe [ 453.139250] RSP: 0018:ffffc90000c67b18 EFLAGS: 00010246 [ 453.139782] RAX: 0000000000000000 RBX: ffff888108a24000 RCX: 0000000000002000 [ 453.140521] RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff8881016d61c8 [ 453.141253] RBP: ffff8881016d61c8 R08: 0000000000000000 R09: 0000000000000000 [ 453.141985] R10: 0000000000000000 R11: 0000000008a24000 R12: 0000000000000001 [ 453.142709] R13: 0000000000000002 R14: 0000000000000000 R15: ffff888107db8c00 [ 453.143450] FS: 00007f44853d4c80(0000) GS:ffff8882f469b000(0000) knlGS:0000000000000000 [ 453.144276] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 453.144853] CR2: 00007f4487629228 CR3: 00000001016aa000 CR4: 00000000000406f0 [ 453.145594] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 453.146320] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 453.147061] Call Trace: [ 453.147336] [ 453.147579] ? tick_nohz_tick_stopped+0xd/0x30 [ 453.148067] ? xas_load+0x9/0xb0 [ 453.148435] ? xa_load+0x6f/0xb0 [ 453.148781] __xe_vm_bind_ioctl+0xbd5/0x1500 [xe] [ 453.149338] ? dev_printk_emit+0x48/0x70 [ 453.149762] ? _dev_printk+0x57/0x80 [ 453.150148] ? drm_ioctl+0x17c/0x440 [ 453.150544] ? __drm_dev_vprintk+0x36/0x90 [ 453.150983] ? __pfx_xe_vm_bind_ioctl+0x10/0x10 [xe] [ 453.151575] ? drm_ioctl_kernel+0x9f/0xf0 [ 453.151998] ? __pfx_xe_vm_bind_ioctl+0x10/0x10 [xe] [ 453.152560] drm_ioctl_kernel+0x9f/0xf0 [ 453.152968] drm_ioctl+0x20f/0x440 [ 453.153332] ? __pfx_xe_vm_bind_ioctl+0x10/0x10 [xe] [ 453.153893] ? ioctl_has_perm.constprop.0.isra.0+0xae/0x100 [ 453.154489] ? memory_bm_test_bit+0x5/0x60 [ 453.154935] xe_drm_ioctl+0x47/0x70 [xe] [ 453.155419] __x64_sys_ioctl+0x8d/0xc0 [ 453.155824] do_syscall_64+0x47/0x110 [ 453.156228] entry_SYSCALL_64_after_hwframe+0x76/0x7e " v2 (Matt): refine commit message to have more details add Fixes tag move the code to xe_svm.h which already have the config remove a blank line per codestyle suggestion Fixes: 63f6e480d115 ("drm/xe: Add SVM garbage collector") Cc: Matthew Brost Signed-off-by: Shuicheng Lin Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20250502170052.1787973-1-shuicheng.lin@intel.com (cherry picked from commit 9d80698bcd97a5ad1088bcbb055e73fd068895e2) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_svm.c | 12 ++++++++++++ drivers/gpu/drm/xe/xe_svm.h | 8 ++++++++ drivers/gpu/drm/xe/xe_vm.c | 3 +-- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 0b6547c06961..24c578e1170e 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -947,3 +947,15 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) return 0; } #endif + +/** + * xe_svm_flush() - SVM flush + * @vm: The VM. + * + * Flush all SVM actions. + */ +void xe_svm_flush(struct xe_vm *vm) +{ + if (xe_vm_in_fault_mode(vm)) + flush_work(&vm->svm.garbage_collector.work); +} diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h index e059590e5076..be306fe7aaa4 100644 --- a/drivers/gpu/drm/xe/xe_svm.h +++ b/drivers/gpu/drm/xe/xe_svm.h @@ -72,6 +72,9 @@ bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end); int xe_svm_bo_evict(struct xe_bo *bo); void xe_svm_range_debug(struct xe_svm_range *range, const char *operation); + +void xe_svm_flush(struct xe_vm *vm); + #else static inline bool xe_svm_range_pages_valid(struct xe_svm_range *range) { @@ -124,6 +127,11 @@ static inline void xe_svm_range_debug(struct xe_svm_range *range, const char *operation) { } + +static inline void xe_svm_flush(struct xe_vm *vm) +{ +} + #endif /** diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 60303998bd61..367c84b90e9e 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3312,8 +3312,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) } /* Ensure all UNMAPs visible */ - if (xe_vm_in_fault_mode(vm)) - flush_work(&vm->svm.garbage_collector.work); + xe_svm_flush(vm); err = down_write_killable(&vm->lock); if (err) From 732b87a409667a370b87955c518e5d004de740b5 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 7 May 2025 18:19:53 +0300 Subject: [PATCH 32/32] drm/i915/dp: Fix determining SST/MST mode during MTP TU state computation Determining the SST/MST mode during state computation must be done based on the output type stored in the CRTC state, which in turn is set once based on the modeset connector's SST vs. MST type and will not change as long as the connector is using the CRTC. OTOH the MST mode indicated by the given connector's intel_dp::is_mst flag can change independently of the above output type, based on what sink is at any moment plugged to the connector. Fix the state computation accordingly. Cc: Jani Nikula Fixes: f6971d7427c2 ("drm/i915/mst: adapt intel_dp_mtp_tu_compute_config() for 128b/132b SST") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4607 Reviewed-by: Jani Nikula Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20250507151953.251846-1-imre.deak@intel.com (cherry picked from commit 0f45696ddb2b901fbf15cb8d2e89767be481d59f) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 02f95108c637..6dc2d31ccb5a 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -242,7 +242,7 @@ int intel_dp_mtp_tu_compute_config(struct intel_dp *intel_dp, to_intel_connector(conn_state->connector); const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; - bool is_mst = intel_dp->is_mst; + bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST); int bpp_x16, slots = -EINVAL; int dsc_slice_count = 0; int max_dpt_bpp_x16;