diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 6b3013d20851b..c643eec4dca0f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1822,7 +1822,7 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, dma_resv_add_excl_fence(shadow->resv, &pw->base.dma); dma_resv_unlock(shadow->resv); - dma_fence_work_commit(&pw->base); + dma_fence_work_commit_imm(&pw->base); return 0; err_batch_unlock: diff --git a/drivers/gpu/drm/i915/i915_sw_fence_work.c b/drivers/gpu/drm/i915/i915_sw_fence_work.c index 997b2998f1f21..a3a81bb8f2c36 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence_work.c +++ b/drivers/gpu/drm/i915/i915_sw_fence_work.c @@ -38,7 +38,10 @@ fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) if (!f->dma.error) { dma_fence_get(&f->dma); - queue_work(system_unbound_wq, &f->work); + if (test_bit(DMA_FENCE_WORK_IMM, &f->dma.flags)) + fence_work(&f->work); + else + queue_work(system_unbound_wq, &f->work); } else { fence_complete(f); } diff --git a/drivers/gpu/drm/i915/i915_sw_fence_work.h b/drivers/gpu/drm/i915/i915_sw_fence_work.h index 3a22b287e2019..2c409f11c5c59 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence_work.h +++ b/drivers/gpu/drm/i915/i915_sw_fence_work.h @@ -32,6 +32,10 @@ struct dma_fence_work { const struct dma_fence_work_ops *ops; }; +enum { + DMA_FENCE_WORK_IMM = DMA_FENCE_FLAG_USER_BITS, +}; + void dma_fence_work_init(struct dma_fence_work *f, const struct dma_fence_work_ops *ops); int dma_fence_work_chain(struct dma_fence_work *f, struct dma_fence *signal); @@ -41,4 +45,23 @@ static inline void dma_fence_work_commit(struct dma_fence_work *f) i915_sw_fence_commit(&f->chain); } +/** + * dma_fence_work_commit_imm: Commit the fence, and if possible execute locally. + * @f: the fenced worker + * + * Instead of always scheduling a worker to execute the callback (see + * dma_fence_work_commit()), we try to execute the callback immediately in + * the local context. It is required that the fence be committed before it + * is published, and that no other threads try to tamper with the number + * of asynchronous waits on the fence (or else the callback will be + * executed in the wrong context, i.e. not the callers). + */ +static inline void dma_fence_work_commit_imm(struct dma_fence_work *f) +{ + if (atomic_read(&f->chain.pending) <= 1) + __set_bit(DMA_FENCE_WORK_IMM, &f->dma.flags); + + dma_fence_work_commit(f); +} + #endif /* I915_SW_FENCE_WORK_H */ diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 08699fa069aa6..191577a983900 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -980,7 +980,7 @@ int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) mutex_unlock(&vma->vm->mutex); err_fence: if (work) - dma_fence_work_commit(&work->base); + dma_fence_work_commit_imm(&work->base); if (wakeref) intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); err_pages: