Skip to content

Commit

Permalink
drm/xe: Decouple job seqno and lrc seqno
Browse files Browse the repository at this point in the history
Tightly coupling these seqno presents problems if alternative fences for
jobs are used. Decouple these for correctness.

v2:
- Slightly reword commit message (Thomas)
- Make sure the lrc fence ops are used in comparison (Thomas)
- Assume seqno is unsigned rather than signed in format string (Thomas)

Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240527135912.152156-2-thomas.hellstrom@linux.intel.com
  • Loading branch information
Matthew Brost authored and Thomas Hellström committed May 27, 2024
1 parent d79e8ca commit 08f7200
Show file tree
Hide file tree
Showing 7 changed files with 30 additions and 19 deletions.
2 changes: 1 addition & 1 deletion drivers/gpu/drm/xe/xe_exec_queue.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,

if (xe_exec_queue_is_parallel(q)) {
q->parallel.composite_fence_ctx = dma_fence_context_alloc(1);
q->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO;
q->parallel.composite_fence_seqno = 0;
}

return q;
Expand Down
5 changes: 3 additions & 2 deletions drivers/gpu/drm/xe/xe_guc_submit.c
Original file line number Diff line number Diff line change
Expand Up @@ -940,8 +940,9 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
return DRM_GPU_SCHED_STAT_NOMINAL;
}

drm_notice(&xe->drm, "Timedout job: seqno=%u, guc_id=%d, flags=0x%lx",
xe_sched_job_seqno(job), q->guc->id, q->flags);
drm_notice(&xe->drm, "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx",
xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
q->guc->id, q->flags);
xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL,
"Kernel-submitted job timed out\n");
xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q),
Expand Down
12 changes: 6 additions & 6 deletions drivers/gpu/drm/xe/xe_ring_ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ static void emit_job_gen12_gsc(struct xe_sched_job *job)

__emit_job_gen12_simple(job, job->q->lrc,
job->batch_addr[0],
xe_sched_job_seqno(job));
xe_sched_job_lrc_seqno(job));
}

static void emit_job_gen12_copy(struct xe_sched_job *job)
Expand All @@ -407,14 +407,14 @@ static void emit_job_gen12_copy(struct xe_sched_job *job)

if (xe_sched_job_is_migration(job->q)) {
emit_migration_job_gen12(job, job->q->lrc,
xe_sched_job_seqno(job));
xe_sched_job_lrc_seqno(job));
return;
}

for (i = 0; i < job->q->width; ++i)
__emit_job_gen12_simple(job, job->q->lrc + i,
job->batch_addr[i],
xe_sched_job_seqno(job));
job->batch_addr[i],
xe_sched_job_lrc_seqno(job));
}

static void emit_job_gen12_video(struct xe_sched_job *job)
Expand All @@ -425,7 +425,7 @@ static void emit_job_gen12_video(struct xe_sched_job *job)
for (i = 0; i < job->q->width; ++i)
__emit_job_gen12_video(job, job->q->lrc + i,
job->batch_addr[i],
xe_sched_job_seqno(job));
xe_sched_job_lrc_seqno(job));
}

static void emit_job_gen12_render_compute(struct xe_sched_job *job)
Expand All @@ -435,7 +435,7 @@ static void emit_job_gen12_render_compute(struct xe_sched_job *job)
for (i = 0; i < job->q->width; ++i)
__emit_job_gen12_render_compute(job, job->q->lrc + i,
job->batch_addr[i],
xe_sched_job_seqno(job));
xe_sched_job_lrc_seqno(job));
}

static const struct xe_ring_ops ring_ops_gen12_gsc = {
Expand Down
16 changes: 8 additions & 8 deletions drivers/gpu/drm/xe/xe_sched_job.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
err = PTR_ERR(job->fence);
goto err_sched_job;
}
job->lrc_seqno = job->fence->seqno;
} else {
struct dma_fence_array *cf;

Expand All @@ -132,6 +133,8 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
err = PTR_ERR(fences[j]);
goto err_fences;
}
if (!j)
job->lrc_seqno = fences[0]->seqno;
}

cf = dma_fence_array_create(q->width, fences,
Expand All @@ -144,10 +147,6 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
goto err_fences;
}

/* Sanity check */
for (j = 0; j < q->width; ++j)
xe_assert(job_to_xe(job), cf->base.seqno == fences[j]->seqno);

job->fence = &cf->base;
}

Expand Down Expand Up @@ -229,9 +228,9 @@ bool xe_sched_job_started(struct xe_sched_job *job)
{
struct xe_lrc *lrc = job->q->lrc;

return !__dma_fence_is_later(xe_sched_job_seqno(job),
return !__dma_fence_is_later(xe_sched_job_lrc_seqno(job),
xe_lrc_start_seqno(lrc),
job->fence->ops);
dma_fence_array_first(job->fence)->ops);
}

bool xe_sched_job_completed(struct xe_sched_job *job)
Expand All @@ -243,8 +242,9 @@ bool xe_sched_job_completed(struct xe_sched_job *job)
* parallel handshake is done.
*/

return !__dma_fence_is_later(xe_sched_job_seqno(job), xe_lrc_seqno(lrc),
job->fence->ops);
return !__dma_fence_is_later(xe_sched_job_lrc_seqno(job),
xe_lrc_seqno(lrc),
dma_fence_array_first(job->fence)->ops);
}

void xe_sched_job_arm(struct xe_sched_job *job)
Expand Down
5 changes: 5 additions & 0 deletions drivers/gpu/drm/xe/xe_sched_job.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ static inline u32 xe_sched_job_seqno(struct xe_sched_job *job)
return job->fence->seqno;
}

static inline u32 xe_sched_job_lrc_seqno(struct xe_sched_job *job)
{
return job->lrc_seqno;
}

static inline void
xe_sched_job_add_migrate_flush(struct xe_sched_job *job, u32 flags)
{
Expand Down
2 changes: 2 additions & 0 deletions drivers/gpu/drm/xe/xe_sched_job_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ struct xe_sched_job {
/** @user_fence.value: write back value */
u64 value;
} user_fence;
/** @lrc_seqno: LRC seqno */
u32 lrc_seqno;
/** @migrate_flush_flags: Additional flush flags for migration jobs */
u32 migrate_flush_flags;
/** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */
Expand Down
7 changes: 5 additions & 2 deletions drivers/gpu/drm/xe/xe_trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ DECLARE_EVENT_CLASS(xe_sched_job,

TP_STRUCT__entry(
__field(u32, seqno)
__field(u32, lrc_seqno)
__field(u16, guc_id)
__field(u32, guc_state)
__field(u32, flags)
Expand All @@ -264,6 +265,7 @@ DECLARE_EVENT_CLASS(xe_sched_job,

TP_fast_assign(
__entry->seqno = xe_sched_job_seqno(job);
__entry->lrc_seqno = xe_sched_job_lrc_seqno(job);
__entry->guc_id = job->q->guc->id;
__entry->guc_state =
atomic_read(&job->q->guc->state);
Expand All @@ -273,8 +275,9 @@ DECLARE_EVENT_CLASS(xe_sched_job,
__entry->batch_addr = (u64)job->batch_addr[0];
),

TP_printk("fence=%p, seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d",
__entry->fence, __entry->seqno, __entry->guc_id,
TP_printk("fence=%p, seqno=%u, lrc_seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d",
__entry->fence, __entry->seqno,
__entry->lrc_seqno, __entry->guc_id,
__entry->batch_addr, __entry->guc_state,
__entry->flags, __entry->error)
);
Expand Down

0 comments on commit 08f7200

Please sign in to comment.