diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index a390f0813c8b6..7111bae759f3f 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -110,6 +110,8 @@ enum { #define XEHP_SW_CTX_ID_WIDTH 16 #define XEHP_SW_COUNTER_SHIFT 58 #define XEHP_SW_COUNTER_WIDTH 6 +#define GEN12_GUC_SW_CTX_ID_SHIFT 39 +#define GEN12_GUC_SW_CTX_ID_WIDTH 16 static inline void lrc_runtime_start(struct intel_context *ce) { diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 15816df916c78..255335868b6ab 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1231,6 +1231,128 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream) return stream->pinned_ctx; } +static int +__store_reg_to_mem(struct i915_request *rq, i915_reg_t reg, u32 ggtt_offset) +{ + u32 *cs, cmd; + + cmd = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + if (GRAPHICS_VER(rq->engine->i915) >= 8) + cmd++; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = cmd; + *cs++ = i915_mmio_reg_offset(reg); + *cs++ = ggtt_offset; + *cs++ = 0; + + intel_ring_advance(rq, cs); + + return 0; +} + +static int +__read_reg(struct intel_context *ce, i915_reg_t reg, u32 ggtt_offset) +{ + struct i915_request *rq; + int err; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_get(rq); + + err = __store_reg_to_mem(rq, reg, ggtt_offset); + + i915_request_add(rq); + if (!err && i915_request_wait(rq, 0, HZ / 2) < 0) + err = -ETIME; + + i915_request_put(rq); + + return err; +} + +static int +gen12_guc_sw_ctx_id(struct intel_context *ce, u32 *ctx_id) +{ + struct i915_vma *scratch; + u32 *val; + int err; + + scratch = __vm_create_scratch_for_read_pinned(&ce->engine->gt->ggtt->vm, 4); + if (IS_ERR(scratch)) + return PTR_ERR(scratch); + + err = i915_vma_sync(scratch); + if (err) + goto err_scratch; + + err = __read_reg(ce, RING_EXECLIST_STATUS_HI(ce->engine->mmio_base), + i915_ggtt_offset(scratch)); + if (err) + goto err_scratch; + + val = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB); + if (IS_ERR(val)) { + err = PTR_ERR(val); + goto err_scratch; + } + + *ctx_id = *val; + i915_gem_object_unpin_map(scratch->obj); + +err_scratch: + i915_vma_unpin_and_release(&scratch, 0); + return err; +} + +/* + * For execlist mode of submission, pick an unused context id + * 0 - (NUM_CONTEXT_TAG -1) are used by other contexts + * XXX_MAX_CONTEXT_HW_ID is used by idle context + * + * For GuC mode of submission read context id from the upper dword of the + * EXECLIST_STATUS register. Note that we read this value only once and expect + * that the value stays fixed for the entire OA use case. There are cases where + * GuC KMD implementation may deregister a context to reuse it's context id, but + * we prevent that from happening to the OA context by pinning it. + */ +static int gen12_get_render_context_id(struct i915_perf_stream *stream) +{ + u32 ctx_id, mask; + int ret; + + if (intel_engine_uses_guc(stream->engine)) { + ret = gen12_guc_sw_ctx_id(stream->pinned_ctx, &ctx_id); + if (ret) + return ret; + + mask = ((1U << GEN12_GUC_SW_CTX_ID_WIDTH) - 1) << + (GEN12_GUC_SW_CTX_ID_SHIFT - 32); + } else if (GRAPHICS_VER_FULL(stream->engine->i915) >= IP_VER(12, 50)) { + ctx_id = (XEHP_MAX_CONTEXT_HW_ID - 1) << + (XEHP_SW_CTX_ID_SHIFT - 32); + + mask = ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) << + (XEHP_SW_CTX_ID_SHIFT - 32); + } else { + ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) << + (GEN11_SW_CTX_ID_SHIFT - 32); + + mask = ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << + (GEN11_SW_CTX_ID_SHIFT - 32); + } + stream->specific_ctx_id = ctx_id & mask; + stream->specific_ctx_id_mask = mask; + + return 0; +} + /** * oa_get_render_ctx_id - determine and hold ctx hw id * @stream: An i915-perf stream opened for OA metrics @@ -1244,6 +1366,7 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream) static int oa_get_render_ctx_id(struct i915_perf_stream *stream) { struct intel_context *ce; + int ret = 0; ce = oa_pin_context(stream); if (IS_ERR(ce)) @@ -1290,24 +1413,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) case 11: case 12: - if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 50)) { - stream->specific_ctx_id_mask = - ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) << - (XEHP_SW_CTX_ID_SHIFT - 32); - stream->specific_ctx_id = - (XEHP_MAX_CONTEXT_HW_ID - 1) << - (XEHP_SW_CTX_ID_SHIFT - 32); - } else { - stream->specific_ctx_id_mask = - ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); - /* - * Pick an unused context id - * 0 - BITS_PER_LONG are used by other contexts - * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context - */ - stream->specific_ctx_id = - (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); - } + ret = gen12_get_render_context_id(stream); break; default: @@ -1321,7 +1427,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) stream->specific_ctx_id, stream->specific_ctx_id_mask); - return 0; + return ret; } /**