diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 4f7f67a785acb..11e272422fb75 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -187,30 +187,34 @@ static void irq_execute_cb_hook(struct irq_work *wrk) irq_execute_cb(wrk); } -static void __notify_execute_cb(struct i915_request *rq) +static __always_inline void +__notify_execute_cb(struct i915_request *rq, bool (*fn)(struct irq_work *wrk)) { struct execute_cb *cb, *cn; - lockdep_assert_held(&rq->lock); - - GEM_BUG_ON(!i915_request_is_active(rq)); if (llist_empty(&rq->execute_cb)) return; - llist_for_each_entry_safe(cb, cn, rq->execute_cb.first, work.llnode) - irq_work_queue(&cb->work); + llist_for_each_entry_safe(cb, cn, + llist_del_all(&rq->execute_cb), + work.llnode) + fn(&cb->work); +} - /* - * XXX Rollback on __i915_request_unsubmit() - * - * In the future, perhaps when we have an active time-slicing scheduler, - * it will be interesting to unsubmit parallel execution and remove - * busywaits from the GPU until their master is restarted. This is - * quite hairy, we have to carefully rollback the fence and do a - * preempt-to-idle cycle on the target engine, all the while the - * master execute_cb may refire. - */ - init_llist_head(&rq->execute_cb); +static void __notify_execute_cb_irq(struct i915_request *rq) +{ + __notify_execute_cb(rq, irq_work_queue); +} + +static bool irq_work_imm(struct irq_work *wrk) +{ + wrk->func(wrk); + return false; +} + +static void __notify_execute_cb_imm(struct i915_request *rq) +{ + __notify_execute_cb(rq, irq_work_imm); } static void free_capture_list(struct i915_request *request) @@ -257,9 +261,16 @@ static void remove_from_engine(struct i915_request *rq) locked = engine; } list_del_init(&rq->sched.link); + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags); + + /* Prevent further __await_execution() registering a cb, then flush */ + set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags); + spin_unlock_irq(&locked->active.lock); + + __notify_execute_cb_imm(rq); } bool i915_request_retire(struct i915_request *rq) @@ -271,6 +282,7 @@ bool i915_request_retire(struct i915_request *rq) GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); trace_i915_request_retire(rq); + i915_request_mark_complete(rq); /* * We know the GPU must have read the request to have @@ -288,15 +300,6 @@ bool i915_request_retire(struct i915_request *rq) __i915_request_fill(rq, POISON_FREE); rq->ring->head = rq->postfix; - /* - * We only loosely track inflight requests across preemption, - * and so we may find ourselves attempting to retire a _completed_ - * request that we have removed from the HW and put back on a run - * queue. - */ - remove_from_engine(rq); - - i915_request_mark_complete(rq); if (!i915_request_signaled(rq)) { spin_lock_irq(&rq->lock); dma_fence_signal_locked(&rq->fence); @@ -320,7 +323,6 @@ bool i915_request_retire(struct i915_request *rq) */ remove_from_engine(rq); GEM_BUG_ON(!llist_empty(&rq->execute_cb)); - spin_unlock_irq(&rq->lock); __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */ @@ -348,12 +350,6 @@ void i915_request_retire_upto(struct i915_request *rq) } while (i915_request_retire(tmp) && tmp != rq); } -static void __llist_add(struct llist_node *node, struct llist_head *head) -{ - node->next = head->first; - head->first = node; -} - static struct i915_request * const * __engine_active(struct intel_engine_cs *engine) { @@ -451,18 +447,24 @@ __await_execution(struct i915_request *rq, cb->work.func = irq_execute_cb_hook; } - spin_lock_irq(&signal->lock); - if (i915_request_is_active(signal) || __request_in_flight(signal)) { - if (hook) { - hook(rq, &signal->fence); - i915_request_put(signal); - } - i915_sw_fence_complete(cb->fence); - kmem_cache_free(global.slab_execute_cbs, cb); - } else { - __llist_add(&cb->work.llnode, &signal->execute_cb); + /* + * Register the callback first, then see if the signaler is already + * active. This ensures that if we race with the + * __notify_execute_cb from i915_request_submit() and we are not + * included in that list, we get a second bite of the cherry and + * execute it ourselves. After this point, a future + * i915_request_submit() will notify us. + * + * In i915_request_retire() we set the ACTIVE bit on a completed + * request (then flush the execute_cb). So by registering the + * callback first, then checking the ACTIVE bit, we serialise with + * the completed/retired request. + */ + if (llist_add(&cb->work.llnode, &signal->execute_cb)) { + if (i915_request_is_active(signal) || + __request_in_flight(signal)) + __notify_execute_cb_imm(signal); } - spin_unlock_irq(&signal->lock); return 0; } @@ -588,10 +590,19 @@ bool __i915_request_submit(struct i915_request *request) * preempt-to-idle cycle on the target engine, all the while the * master execute_cb may refire. */ - __notify_execute_cb(request); + __notify_execute_cb_irq(request); - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) - i915_request_enable_breadcrumb(request); + /* We may be recursing from the signal callback of another i915 fence */ + if (!i915_request_signaled(request)) { + spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, + &request->fence.flags) && + !i915_request_enable_breadcrumb(request)) + intel_engine_signal_breadcrumbs(engine); + + spin_unlock(&request->lock); + } return result; }