From 19cff16559a4f2d763faf4f8392bf86d3a21b93c Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Fri, 2 Aug 2024 14:22:26 -0400 Subject: [PATCH] drm/amdgpu: abort KIQ waits when there is a pending reset Stop waiting for the KIQ to return back when there is a reset pending. It's quite likely that the KIQ will never response. Signed-off-by: Koenig Christian Suggested-by: Lazar Lijo Tested-by: Victor Skvortsov Signed-off-by: Victor Skvortsov Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index b49b3650fd621..17a19d49d30a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -786,7 +786,8 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev, goto failed_kiq; might_sleep(); - while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY && + !amdgpu_reset_pending(adev->reset_domain)) { msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index 4ae581f3fcb54..1cb920abc2fe9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -136,6 +136,12 @@ static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *doma return queue_work(domain->wq, work); } +static inline bool amdgpu_reset_pending(struct amdgpu_reset_domain *domain) +{ + lockdep_assert_held(&domain->sem); + return rwsem_is_contended(&domain->sem); +} + void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain); void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain);