Skip to content

Commit

Permalink
drm/scheduler: Add drm_sched_suspend/resume_timeout()
Browse files Browse the repository at this point in the history
This patch adds two new functions to help client drivers suspend and
resume the scheduler job timeout. This can be useful in cases where the
hardware has preemption support enabled. Using this, it is possible to have
the timeout active only for the ring which is active on the ringbuffer.
This patch also makes the job_list_lock IRQ safe.

Suggested-by: Christian Koenig <Christian.Koenig@amd.com>
Signed-off-by: Sharat Masetty <smasetty@codeaurora.org>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
Sharat Masetty authored and Alex Deucher committed Dec 5, 2018
1 parent 9afd075 commit 1db8c14
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 16 deletions.
9 changes: 5 additions & 4 deletions drivers/gpu/drm/etnaviv/etnaviv_dump.c
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
unsigned int n_obj, n_bomap_pages;
size_t file_size, mmu_size;
__le64 *bomap, *bomap_start;
unsigned long flags;

/* Only catch the first event, or when manually re-armed */
if (!etnaviv_dump_core)
Expand All @@ -134,13 +135,13 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
mmu_size + gpu->buffer.size;

/* Add in the active command buffers */
spin_lock(&gpu->sched.job_list_lock);
spin_lock_irqsave(&sched->job_list_lock, flags);
list_for_each_entry(s_job, &gpu->sched.ring_mirror_list, node) {
submit = to_etnaviv_submit(s_job);
file_size += submit->cmdbuf.size;
n_obj++;
}
spin_unlock(&gpu->sched.job_list_lock);
spin_unlock_irqrestore(&sched->job_list_lock, flags);

/* Add in the active buffer objects */
list_for_each_entry(vram, &gpu->mmu->mappings, mmu_node) {
Expand Down Expand Up @@ -182,14 +183,14 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
gpu->buffer.size,
etnaviv_cmdbuf_get_va(&gpu->buffer));

spin_lock(&gpu->sched.job_list_lock);
spin_lock_irqsave(&sched->job_list_lock, flags);
list_for_each_entry(s_job, &gpu->sched.ring_mirror_list, node) {
submit = to_etnaviv_submit(s_job);
etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD,
submit->cmdbuf.vaddr, submit->cmdbuf.size,
etnaviv_cmdbuf_get_va(&submit->cmdbuf));
}
spin_unlock(&gpu->sched.job_list_lock);
spin_unlock_irqrestore(&sched->job_list_lock, flags);

/* Reserve space for the bomap */
if (n_bomap_pages) {
Expand Down
85 changes: 73 additions & 12 deletions drivers/gpu/drm/scheduler/sched_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,13 +211,70 @@ void drm_sched_fault(struct drm_gpu_scheduler *sched)
}
EXPORT_SYMBOL(drm_sched_fault);

/**
* drm_sched_suspend_timeout - Suspend scheduler job timeout
*
* @sched: scheduler instance for which to suspend the timeout
*
* Suspend the delayed work timeout for the scheduler. This is done by
* modifying the delayed work timeout to an arbitrary large value,
* MAX_SCHEDULE_TIMEOUT in this case. Note that this function can be
* called from an IRQ context.
*
* Returns the timeout remaining
*
*/
unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
{
unsigned long sched_timeout, now = jiffies;

sched_timeout = sched->work_tdr.timer.expires;

/*
* Modify the timeout to an arbitrarily large value. This also prevents
* the timeout to be restarted when new submissions arrive
*/
if (mod_delayed_work(system_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT)
&& time_after(sched_timeout, now))
return sched_timeout - now;
else
return sched->timeout;
}
EXPORT_SYMBOL(drm_sched_suspend_timeout);

/**
* drm_sched_resume_timeout - Resume scheduler job timeout
*
* @sched: scheduler instance for which to resume the timeout
* @remaining: remaining timeout
*
* Resume the delayed work timeout for the scheduler. Note that
* this function can be called from an IRQ context.
*/
void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
unsigned long remaining)
{
unsigned long flags;

spin_lock_irqsave(&sched->job_list_lock, flags);

if (list_empty(&sched->ring_mirror_list))
cancel_delayed_work(&sched->work_tdr);
else
mod_delayed_work(system_wq, &sched->work_tdr, remaining);

spin_unlock_irqrestore(&sched->job_list_lock, flags);
}
EXPORT_SYMBOL(drm_sched_resume_timeout);

/* job_finish is called after hw fence signaled
*/
static void drm_sched_job_finish(struct work_struct *work)
{
struct drm_sched_job *s_job = container_of(work, struct drm_sched_job,
finish_work);
struct drm_gpu_scheduler *sched = s_job->sched;
unsigned long flags;

/*
* Canceling the timeout without removing our job from the ring mirror
Expand All @@ -228,12 +285,12 @@ static void drm_sched_job_finish(struct work_struct *work)
*/
cancel_delayed_work_sync(&sched->work_tdr);

spin_lock(&sched->job_list_lock);
spin_lock_irqsave(&sched->job_list_lock, flags);
/* remove job from ring_mirror_list */
list_del_init(&s_job->node);
/* queue TDR for next job */
drm_sched_start_timeout(sched);
spin_unlock(&sched->job_list_lock);
spin_unlock_irqrestore(&sched->job_list_lock, flags);

sched->ops->free_job(s_job);
}
Expand All @@ -249,20 +306,22 @@ static void drm_sched_job_finish_cb(struct dma_fence *f,
static void drm_sched_job_begin(struct drm_sched_job *s_job)
{
struct drm_gpu_scheduler *sched = s_job->sched;
unsigned long flags;

dma_fence_add_callback(&s_job->s_fence->finished, &s_job->finish_cb,
drm_sched_job_finish_cb);

spin_lock(&sched->job_list_lock);
spin_lock_irqsave(&sched->job_list_lock, flags);
list_add_tail(&s_job->node, &sched->ring_mirror_list);
drm_sched_start_timeout(sched);
spin_unlock(&sched->job_list_lock);
spin_unlock_irqrestore(&sched->job_list_lock, flags);
}

static void drm_sched_job_timedout(struct work_struct *work)
{
struct drm_gpu_scheduler *sched;
struct drm_sched_job *job;
unsigned long flags;

sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
job = list_first_entry_or_null(&sched->ring_mirror_list,
Expand All @@ -271,9 +330,9 @@ static void drm_sched_job_timedout(struct work_struct *work)
if (job)
job->sched->ops->timedout_job(job);

spin_lock(&sched->job_list_lock);
spin_lock_irqsave(&sched->job_list_lock, flags);
drm_sched_start_timeout(sched);
spin_unlock(&sched->job_list_lock);
spin_unlock_irqrestore(&sched->job_list_lock, flags);
}

/**
Expand All @@ -287,9 +346,10 @@ void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct drm_sched_jo
{
struct drm_sched_job *s_job;
struct drm_sched_entity *entity, *tmp;
unsigned long flags;
int i;

spin_lock(&sched->job_list_lock);
spin_lock_irqsave(&sched->job_list_lock, flags);
list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) {
if (s_job->s_fence->parent &&
dma_fence_remove_callback(s_job->s_fence->parent,
Expand All @@ -299,7 +359,7 @@ void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct drm_sched_jo
atomic_dec(&sched->hw_rq_count);
}
}
spin_unlock(&sched->job_list_lock);
spin_unlock_irqrestore(&sched->job_list_lock, flags);

if (bad && bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
atomic_inc(&bad->karma);
Expand Down Expand Up @@ -337,9 +397,10 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
{
struct drm_sched_job *s_job, *tmp;
bool found_guilty = false;
unsigned long flags;
int r;

spin_lock(&sched->job_list_lock);
spin_lock_irqsave(&sched->job_list_lock, flags);
list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
struct drm_sched_fence *s_fence = s_job->s_fence;
struct dma_fence *fence;
Expand All @@ -353,7 +414,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
dma_fence_set_error(&s_fence->finished, -ECANCELED);

spin_unlock(&sched->job_list_lock);
spin_unlock_irqrestore(&sched->job_list_lock, flags);
fence = sched->ops->run_job(s_job);
atomic_inc(&sched->hw_rq_count);

Expand All @@ -372,10 +433,10 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
drm_sched_expel_job_unlocked(s_job);
drm_sched_process_job(NULL, &s_fence->cb);
}
spin_lock(&sched->job_list_lock);
spin_lock_irqsave(&sched->job_list_lock, flags);
}
drm_sched_start_timeout(sched);
spin_unlock(&sched->job_list_lock);
spin_unlock_irqrestore(&sched->job_list_lock, flags);
}
EXPORT_SYMBOL(drm_sched_job_recovery);

Expand Down
4 changes: 4 additions & 0 deletions include/drm/gpu_scheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -331,4 +331,8 @@ struct drm_sched_fence *drm_sched_fence_create(
void drm_sched_fence_scheduled(struct drm_sched_fence *fence);
void drm_sched_fence_finished(struct drm_sched_fence *fence);

unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched);
void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
unsigned long remaining);

#endif

0 comments on commit 1db8c14

Please sign in to comment.