Skip to content

Commit

Permalink
accel/ivpu: Fix locking order in ivpu_job_submit
Browse files Browse the repository at this point in the history
commit ab680dc upstream.

Fix deadlock in job submission and abort handling.
When a thread aborts currently executing jobs due to a fault,
it first locks the global lock protecting submitted_jobs (#1).

After the last job is destroyed, it proceeds to release the related context
and locks file_priv (#2). Meanwhile, in the job submission thread,
the file_priv lock (#2) is taken first, and then the submitted_jobs
lock (#1) is obtained when a job is added to the submitted jobs list.

       CPU0                            CPU1
       ----                    	       ----
  (for example due to a fault)         (jobs submissions keep coming)

  lock(&vdev->submitted_jobs_lock) #1
  ivpu_jobs_abort_all()
  job_destroy()
                                      lock(&file_priv->lock)           #2
                                      lock(&vdev->submitted_jobs_lock) #1
  file_priv_release()
  lock(&vdev->context_list_lock)
  lock(&file_priv->lock)           #2

This order of locking causes a deadlock. To resolve this issue,
change the order of locking in ivpu_job_submit().

Signed-off-by: Karol Wachowski <karol.wachowski@intel.com>
Signed-off-by: Maciej Falkowski <maciej.falkowski@linux.intel.com>
Reviewed-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250107173238.381120-12-maciej.falkowski@linux.intel.com
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
[ This backport required small adjustments to ivpu_job_submit(),
  which lacks support for explicit command queue creation added in 6.15.  ]
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  • Loading branch information
Karol Wachowski authored and Greg Kroah-Hartman committed May 9, 2025
1 parent 437b1eb commit b9b7092
Showing 1 changed file with 6 additions and 9 deletions.
15 changes: 6 additions & 9 deletions drivers/accel/ivpu/ivpu_job.c
Original file line number Diff line number Diff line change
Expand Up @@ -532,26 +532,25 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority)
if (ret < 0)
return ret;

mutex_lock(&vdev->submitted_jobs_lock);
mutex_lock(&file_priv->lock);

cmdq = ivpu_cmdq_acquire(file_priv, priority);
if (!cmdq) {
ivpu_warn_ratelimited(vdev, "Failed to get job queue, ctx %d engine %d prio %d\n",
file_priv->ctx.id, job->engine_idx, priority);
ret = -EINVAL;
goto err_unlock_file_priv;
goto err_unlock;
}

mutex_lock(&vdev->submitted_jobs_lock);

is_first_job = xa_empty(&vdev->submitted_jobs_xa);
ret = xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit,
&file_priv->job_id_next, GFP_KERNEL);
if (ret < 0) {
ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n",
file_priv->ctx.id);
ret = -EBUSY;
goto err_unlock_submitted_jobs;
goto err_unlock;
}

ret = ivpu_cmdq_push_job(cmdq, job);
Expand All @@ -574,22 +573,20 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority)
job->job_id, file_priv->ctx.id, job->engine_idx, priority,
job->cmd_buf_vpu_addr, cmdq->jobq->header.tail);

mutex_unlock(&vdev->submitted_jobs_lock);
mutex_unlock(&file_priv->lock);

if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) {
mutex_lock(&vdev->submitted_jobs_lock);
ivpu_job_signal_and_destroy(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS);
mutex_unlock(&vdev->submitted_jobs_lock);
}

mutex_unlock(&vdev->submitted_jobs_lock);

return 0;

err_erase_xa:
xa_erase(&vdev->submitted_jobs_xa, job->job_id);
err_unlock_submitted_jobs:
err_unlock:
mutex_unlock(&vdev->submitted_jobs_lock);
err_unlock_file_priv:
mutex_unlock(&file_priv->lock);
ivpu_rpm_put(vdev);
return ret;
Expand Down

0 comments on commit b9b7092

Please sign in to comment.