Skip to content

Commit

Permalink
drm/amdkfd: Remove kfd_process_hw_exception worker
Browse files Browse the repository at this point in the history
With GPU reset-domain worker implemented, KFD hw_exception worker is not
needed any more, just call amdgpu_amdkfd_gpu_reset directly from
kfd_hws_hang.

Suggested-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Philip Yang authored and Alex Deucher committed Feb 27, 2025
1 parent f923421 commit ee3ed10
Showing 2 changed files with 1 addition and 11 deletions.
11 changes: 1 addition & 10 deletions drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
Original file line number Diff line number Diff line change
@@ -66,7 +66,6 @@ static inline void deallocate_hqd(struct device_queue_manager *dqm,
static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
static int allocate_sdma_queue(struct device_queue_manager *dqm,
struct queue *q, const uint32_t *restore_sdma_id);
static void kfd_process_hw_exception(struct work_struct *work);

static inline
enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
@@ -170,7 +169,7 @@ static void kfd_hws_hang(struct device_queue_manager *dqm)
/*
* Issue a GPU reset if HWS is unresponsive
*/
schedule_work(&dqm->hw_exception_work);
amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
}

static int convert_to_mes_queue_type(int queue_type)
@@ -1740,7 +1739,6 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
dqm->active_cp_queue_count = 0;
dqm->gws_queue_count = 0;
dqm->active_runlist = false;
INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
dqm->trap_debug_vmid = 0;

init_sdma_bitmaps(dqm);
@@ -3080,13 +3078,6 @@ int kfd_evict_process_device(struct kfd_process_device *pdd)
return ret;
}

static void kfd_process_hw_exception(struct work_struct *work)
{
struct device_queue_manager *dqm = container_of(work,
struct device_queue_manager, hw_exception_work);
amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
}

int reserve_debug_trap_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
1 change: 0 additions & 1 deletion drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
Original file line number Diff line number Diff line change
@@ -269,7 +269,6 @@ struct device_queue_manager {
/* hw exception */
bool is_hws_hang;
bool is_resetting;
struct work_struct hw_exception_work;
struct kfd_mem_obj hiq_sdma_mqd;
bool sched_running;
bool sched_halt;

0 comments on commit ee3ed10

Please sign in to comment.