Skip to content

Commit

Permalink
drm/i915/gt: Eliminate the trylock for reading a timeline's hwsp
Browse files Browse the repository at this point in the history
As we stash a pointer to the HWSP cacheline on the request, when reading
it we only need confirm that the cacheline is still valid by checking
that the request and timeline are still intact.

v2: Protect hwsp_cachline with RCU

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191217011659.3092130-1-chris@chris-wilson.co.uk
  • Loading branch information
Chris Wilson committed Dec 17, 2019
1 parent e14177f commit 85bedbf
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 46 deletions.
64 changes: 24 additions & 40 deletions drivers/gpu/drm/i915/gt/intel_timeline.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
#define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
#define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))

#define CACHELINE_BITS 6
#define CACHELINE_FREE CACHELINE_BITS

struct intel_timeline_hwsp {
struct intel_gt *gt;
struct intel_gt_timelines *gt_timelines;
Expand All @@ -23,14 +26,6 @@ struct intel_timeline_hwsp {
u64 free_bitmap;
};

struct intel_timeline_cacheline {
struct i915_active active;
struct intel_timeline_hwsp *hwsp;
void *vaddr;
#define CACHELINE_BITS 6
#define CACHELINE_FREE CACHELINE_BITS
};

static struct i915_vma *__hwsp_alloc(struct intel_gt *gt)
{
struct drm_i915_private *i915 = gt->i915;
Expand Down Expand Up @@ -133,7 +128,7 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
__idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));

i915_active_fini(&cl->active);
kfree(cl);
kfree_rcu(cl, rcu);
}

__i915_active_call
Expand Down Expand Up @@ -514,46 +509,35 @@ int intel_timeline_read_hwsp(struct i915_request *from,
struct i915_request *to,
u32 *hwsp)
{
struct intel_timeline *tl;
struct intel_timeline_cacheline *cl;
int err;

GEM_BUG_ON(!rcu_access_pointer(from->hwsp_cacheline));

rcu_read_lock();
tl = rcu_dereference(from->timeline);
if (i915_request_completed(from) || !kref_get_unless_zero(&tl->kref))
tl = NULL;
cl = rcu_dereference(from->hwsp_cacheline);
if (unlikely(!i915_active_acquire_if_busy(&cl->active)))
goto unlock; /* seqno wrapped and completed! */
if (unlikely(i915_request_completed(from)))
goto release;
rcu_read_unlock();
if (!tl) /* already completed */
return 1;

GEM_BUG_ON(rcu_access_pointer(to->timeline) == tl);

err = -EAGAIN;
if (mutex_trylock(&tl->mutex)) {
struct intel_timeline_cacheline *cl = from->hwsp_cacheline;

if (i915_request_completed(from)) {
err = 1;
goto unlock;
}
err = cacheline_ref(cl, to);
if (err)
goto out;

err = cacheline_ref(cl, to);
if (err)
goto unlock;
*hwsp = i915_ggtt_offset(cl->hwsp->vma) +
ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES;

if (likely(cl == tl->hwsp_cacheline)) {
*hwsp = tl->hwsp_offset;
} else { /* across a seqno wrap, recover the original offset */
*hwsp = i915_ggtt_offset(cl->hwsp->vma) +
ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) *
CACHELINE_BYTES;
}
out:
i915_active_release(&cl->active);
return err;

release:
i915_active_release(&cl->active);
unlock:
mutex_unlock(&tl->mutex);
}
intel_timeline_put(tl);

return err;
rcu_read_unlock();
return 1;
}

void intel_timeline_unpin(struct intel_timeline *tl)
Expand Down
12 changes: 11 additions & 1 deletion drivers/gpu/drm/i915/gt/intel_timeline_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@
#include <linux/list.h>
#include <linux/kref.h>
#include <linux/mutex.h>
#include <linux/rcupdate.h>
#include <linux/types.h>

#include "i915_active_types.h"

struct drm_i915_private;
struct i915_vma;
struct intel_timeline_cacheline;
struct i915_syncmap;
struct intel_timeline_hwsp;

struct intel_timeline {
u64 fence_context;
Expand Down Expand Up @@ -87,4 +88,13 @@ struct intel_timeline {
struct rcu_head rcu;
};

struct intel_timeline_cacheline {
struct i915_active active;

struct intel_timeline_hwsp *hwsp;
void *vaddr;

struct rcu_head rcu;
};

#endif /* __I915_TIMELINE_TYPES_H__ */
4 changes: 2 additions & 2 deletions drivers/gpu/drm/i915/i915_request.c
Original file line number Diff line number Diff line change
Expand Up @@ -655,9 +655,9 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
rq->execution_mask = ce->engine->mask;
rq->flags = 0;

rcu_assign_pointer(rq->timeline, tl);
RCU_INIT_POINTER(rq->timeline, tl);
RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline);
rq->hwsp_seqno = tl->hwsp_seqno;
rq->hwsp_cacheline = tl->hwsp_cacheline;

rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */

Expand Down
5 changes: 2 additions & 3 deletions drivers/gpu/drm/i915/i915_request.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

#include "gt/intel_context_types.h"
#include "gt/intel_engine_types.h"
#include "gt/intel_timeline_types.h"

#include "i915_gem.h"
#include "i915_scheduler.h"
Expand All @@ -41,8 +42,6 @@
struct drm_file;
struct drm_i915_gem_object;
struct i915_request;
struct intel_timeline;
struct intel_timeline_cacheline;

struct i915_capture_list {
struct i915_capture_list *next;
Expand Down Expand Up @@ -183,7 +182,7 @@ struct i915_request {
* inside the timeline's HWSP vma, but it is only valid while this
* request has not completed and guarded by the timeline mutex.
*/
struct intel_timeline_cacheline *hwsp_cacheline;
struct intel_timeline_cacheline __rcu *hwsp_cacheline;

/** Position in the ring of the start of the request */
u32 head;
Expand Down

0 comments on commit 85bedbf

Please sign in to comment.