Skip to content

Commit

Permalink
drm/sched: implement dynamic job-flow control
Browse files Browse the repository at this point in the history
Currently, job flow control is implemented simply by limiting the number
of jobs in flight. Therefore, a scheduler is initialized with a credit
limit that corresponds to the number of jobs which can be sent to the
hardware.

This implies that for each job, drivers need to account for the maximum
job size possible in order to not overflow the ring buffer.

However, there are drivers, such as Nouveau, where the job size has a
rather large range. For such drivers it can easily happen that job
submissions not even filling the ring by 1% can block subsequent
submissions, which, in the worst case, can lead to the ring run dry.

In order to overcome this issue, allow for tracking the actual job size
instead of the number of jobs. Therefore, add a field to track a job's
credit count, which represents the number of credits a job contributes
to the scheduler's credit limit.

Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Reviewed-by: Luben Tuikov <ltuikov89@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231110001638.71750-1-dakr@redhat.com
  • Loading branch information
Danilo Krummrich committed Nov 10, 2023
1 parent 36245bd commit a78422e
Show file tree
Hide file tree
Showing 14 changed files with 175 additions and 51 deletions.
6 changes: 6 additions & 0 deletions Documentation/gpu/drm-mm.rst
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,12 @@ Overview
.. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c
:doc: Overview

Flow Control
------------

.. kernel-doc:: drivers/gpu/drm/scheduler/sched_main.c
:doc: Flow Control

Scheduler Function References
-----------------------------

Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (!entity)
return 0;

return drm_sched_job_init(&(*job)->base, entity, owner);
return drm_sched_job_init(&(*job)->base, entity, 1, owner);
}

int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,

ret = drm_sched_job_init(&submit->sched_job,
&ctx->sched_entity[args->pipe],
submit->ctx);
1, submit->ctx);
if (ret)
goto err_submit_put;

Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/etnaviv/etnaviv_gpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -1917,7 +1917,7 @@ static int etnaviv_gpu_rpm_suspend(struct device *dev)
u32 idle, mask;

/* If there are any jobs in the HW queue, we're not idle */
if (atomic_read(&gpu->sched.hw_rq_count))
if (atomic_read(&gpu->sched.credit_count))
return -EBUSY;

/* Check whether the hardware (except FE and MC) is idle */
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/lima/lima_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,7 @@ int lima_device_suspend(struct device *dev)

/* check any task running */
for (i = 0; i < lima_pipe_num; i++) {
if (atomic_read(&ldev->pipe[i].base.hw_rq_count))
if (atomic_read(&ldev->pipe[i].base.credit_count))
return -EBUSY;
}

Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/lima/lima_sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ int lima_sched_task_init(struct lima_sched_task *task,
for (i = 0; i < num_bos; i++)
drm_gem_object_get(&bos[i]->base.base);

err = drm_sched_job_init(&task->base, &context->base, vm);
err = drm_sched_job_init(&task->base, &context->base, 1, vm);
if (err) {
kfree(task->bos);
return err;
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/msm/msm_gem_submit.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
return ERR_PTR(ret);
}

ret = drm_sched_job_init(&submit->base, queue->entity, queue);
ret = drm_sched_job_init(&submit->base, queue->entity, 1, queue);
if (ret) {
kfree(submit->hw_fence);
kfree(submit);
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/nouveau/nouveau_sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ nouveau_job_init(struct nouveau_job *job,

}

ret = drm_sched_job_init(&job->base, &entity->base, NULL);
ret = drm_sched_job_init(&job->base, &entity->base, 1, NULL);
if (ret)
goto err_free_chains;

Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/panfrost/panfrost_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,

ret = drm_sched_job_init(&job->base,
&file_priv->sched_entity[slot],
NULL);
1, NULL);
if (ret)
goto out_put_job;

Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/panfrost/panfrost_job.c
Original file line number Diff line number Diff line change
Expand Up @@ -963,7 +963,7 @@ int panfrost_job_is_idle(struct panfrost_device *pfdev)

for (i = 0; i < NUM_JOB_SLOTS; i++) {
/* If there are any jobs in the HW queue, we're not idle */
if (atomic_read(&js->queue[i].sched.hw_rq_count))
if (atomic_read(&js->queue[i].sched.credit_count))
return false;
}

Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ DECLARE_EVENT_CLASS(drm_sched_job,
__assign_str(name, sched_job->sched->name);
__entry->job_count = spsc_queue_count(&entity->job_queue);
__entry->hw_job_count = atomic_read(
&sched_job->sched->hw_rq_count);
&sched_job->sched->credit_count);
),
TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d",
__entry->entity, __entry->id,
Expand Down
Loading

0 comments on commit a78422e

Please sign in to comment.