Skip to content

Commit

Permalink
drm/i915/perf: Fix OA filtering logic for GuC mode
Browse files Browse the repository at this point in the history
With GuC mode of submission, GuC is in control of defining the context
id field that is part of the OA reports. To filter reports, UMD and KMD
must know what sw context id was chosen by GuC. There is not interface
between KMD and GuC to determine this, so read the upper-dword of
EXECLIST_STATUS to filter/squash OA reports for the specific context.

v2: Explain guc id stealing w.r.t OA use case

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221026222102.5526-2-umesh.nerlige.ramappa@intel.com
  • Loading branch information
Umesh Nerlige Ramappa authored and John Harrison committed Oct 27, 2022
1 parent a8a4f04 commit 682aa43
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 19 deletions.
2 changes: 2 additions & 0 deletions drivers/gpu/drm/i915/gt/intel_lrc.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ enum {
#define XEHP_SW_CTX_ID_WIDTH 16
#define XEHP_SW_COUNTER_SHIFT 58
#define XEHP_SW_COUNTER_WIDTH 6
#define GEN12_GUC_SW_CTX_ID_SHIFT 39
#define GEN12_GUC_SW_CTX_ID_WIDTH 16

static inline void lrc_runtime_start(struct intel_context *ce)
{
Expand Down
144 changes: 125 additions & 19 deletions drivers/gpu/drm/i915/i915_perf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1231,6 +1231,128 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
return stream->pinned_ctx;
}

static int
__store_reg_to_mem(struct i915_request *rq, i915_reg_t reg, u32 ggtt_offset)
{
u32 *cs, cmd;

cmd = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
if (GRAPHICS_VER(rq->engine->i915) >= 8)
cmd++;

cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs))
return PTR_ERR(cs);

*cs++ = cmd;
*cs++ = i915_mmio_reg_offset(reg);
*cs++ = ggtt_offset;
*cs++ = 0;

intel_ring_advance(rq, cs);

return 0;
}

static int
__read_reg(struct intel_context *ce, i915_reg_t reg, u32 ggtt_offset)
{
struct i915_request *rq;
int err;

rq = i915_request_create(ce);
if (IS_ERR(rq))
return PTR_ERR(rq);

i915_request_get(rq);

err = __store_reg_to_mem(rq, reg, ggtt_offset);

i915_request_add(rq);
if (!err && i915_request_wait(rq, 0, HZ / 2) < 0)
err = -ETIME;

i915_request_put(rq);

return err;
}

static int
gen12_guc_sw_ctx_id(struct intel_context *ce, u32 *ctx_id)
{
struct i915_vma *scratch;
u32 *val;
int err;

scratch = __vm_create_scratch_for_read_pinned(&ce->engine->gt->ggtt->vm, 4);
if (IS_ERR(scratch))
return PTR_ERR(scratch);

err = i915_vma_sync(scratch);
if (err)
goto err_scratch;

err = __read_reg(ce, RING_EXECLIST_STATUS_HI(ce->engine->mmio_base),
i915_ggtt_offset(scratch));
if (err)
goto err_scratch;

val = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
if (IS_ERR(val)) {
err = PTR_ERR(val);
goto err_scratch;
}

*ctx_id = *val;
i915_gem_object_unpin_map(scratch->obj);

err_scratch:
i915_vma_unpin_and_release(&scratch, 0);
return err;
}

/*
* For execlist mode of submission, pick an unused context id
* 0 - (NUM_CONTEXT_TAG -1) are used by other contexts
* XXX_MAX_CONTEXT_HW_ID is used by idle context
*
* For GuC mode of submission read context id from the upper dword of the
* EXECLIST_STATUS register. Note that we read this value only once and expect
* that the value stays fixed for the entire OA use case. There are cases where
* GuC KMD implementation may deregister a context to reuse it's context id, but
* we prevent that from happening to the OA context by pinning it.
*/
static int gen12_get_render_context_id(struct i915_perf_stream *stream)
{
u32 ctx_id, mask;
int ret;

if (intel_engine_uses_guc(stream->engine)) {
ret = gen12_guc_sw_ctx_id(stream->pinned_ctx, &ctx_id);
if (ret)
return ret;

mask = ((1U << GEN12_GUC_SW_CTX_ID_WIDTH) - 1) <<
(GEN12_GUC_SW_CTX_ID_SHIFT - 32);
} else if (GRAPHICS_VER_FULL(stream->engine->i915) >= IP_VER(12, 50)) {
ctx_id = (XEHP_MAX_CONTEXT_HW_ID - 1) <<
(XEHP_SW_CTX_ID_SHIFT - 32);

mask = ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
(XEHP_SW_CTX_ID_SHIFT - 32);
} else {
ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) <<
(GEN11_SW_CTX_ID_SHIFT - 32);

mask = ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) <<
(GEN11_SW_CTX_ID_SHIFT - 32);
}
stream->specific_ctx_id = ctx_id & mask;
stream->specific_ctx_id_mask = mask;

return 0;
}

/**
* oa_get_render_ctx_id - determine and hold ctx hw id
* @stream: An i915-perf stream opened for OA metrics
Expand All @@ -1244,6 +1366,7 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
{
struct intel_context *ce;
int ret = 0;

ce = oa_pin_context(stream);
if (IS_ERR(ce))
Expand Down Expand Up @@ -1290,24 +1413,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)

case 11:
case 12:
if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 50)) {
stream->specific_ctx_id_mask =
((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
(XEHP_SW_CTX_ID_SHIFT - 32);
stream->specific_ctx_id =
(XEHP_MAX_CONTEXT_HW_ID - 1) <<
(XEHP_SW_CTX_ID_SHIFT - 32);
} else {
stream->specific_ctx_id_mask =
((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
/*
* Pick an unused context id
* 0 - BITS_PER_LONG are used by other contexts
* GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context
*/
stream->specific_ctx_id =
(GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
}
ret = gen12_get_render_context_id(stream);
break;

default:
Expand All @@ -1321,7 +1427,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
stream->specific_ctx_id,
stream->specific_ctx_id_mask);

return 0;
return ret;
}

/**
Expand Down

0 comments on commit 682aa43

Please sign in to comment.