Skip to content

Commit

Permalink
drm/i915/perf: Determine gen12 oa ctx offset at runtime
Browse files Browse the repository at this point in the history
Some SKUs of same gen12 platform may have different oactxctrl
offsets. For gen12, determine oactxctrl offsets at runtime.

v2: (Lionel)
- Move MI definitions to intel_gpu_commands.h
- Ensure __find_reg_in_lri does read past context image size

v3: (Ashutosh)
- Drop unnecessary use of double underscores
- fix find_reg_in_lri
- Return error if oa context offset is U32_MAX
- Error out if oa_ctx_ctrl_offset does not find offset

v4: (Ashutosh)
- Warn on odd MI LRI_LEN
- Remove unnecessary check for valid_oactxctrl_offset
- Drop valid_oactxctrl_offset macro

v5: Drop unrelated comment

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221026222102.5526-5-umesh.nerlige.ramappa@intel.com
  • Loading branch information
Umesh Nerlige Ramappa authored and John Harrison committed Oct 27, 2022
1 parent 2d9da58 commit a5c3a3c
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 25 deletions.
4 changes: 4 additions & 0 deletions drivers/gpu/drm/i915/gt/intel_gpu_commands.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,10 @@
#define MI_BATCH_RESOURCE_STREAMER REG_BIT(10)
#define MI_BATCH_PREDICATE REG_BIT(15) /* HSW+ on RCS only*/

#define MI_OPCODE(x) (((x) >> 23) & 0x3f)
#define IS_MI_LRI_CMD(x) (MI_OPCODE(x) == MI_OPCODE(MI_INSTR(0x22, 0)))
#define MI_LRI_LEN(x) (((x) & 0xff) + 1)

/*
* 3D instructions used by the kernel
*/
Expand Down
146 changes: 122 additions & 24 deletions drivers/gpu/drm/i915/i915_perf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1356,6 +1356,74 @@ static int gen12_get_render_context_id(struct i915_perf_stream *stream)
return 0;
}

static bool oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end)
{
u32 idx = *offset;
u32 len = min(MI_LRI_LEN(state[idx]) + idx, end);
bool found = false;

idx++;
for (; idx < len; idx += 2) {
if (state[idx] == reg) {
found = true;
break;
}
}

*offset = idx;
return found;
}

static u32 oa_context_image_offset(struct intel_context *ce, u32 reg)
{
u32 offset, len = (ce->engine->context_size - PAGE_SIZE) / 4;
u32 *state = ce->lrc_reg_state;

for (offset = 0; offset < len; ) {
if (IS_MI_LRI_CMD(state[offset])) {
/*
* We expect reg-value pairs in MI_LRI command, so
* MI_LRI_LEN() should be even, if not, issue a warning.
*/
drm_WARN_ON(&ce->engine->i915->drm,
MI_LRI_LEN(state[offset]) & 0x1);

if (oa_find_reg_in_lri(state, reg, &offset, len))
break;
} else {
offset++;
}
}

return offset < len ? offset : U32_MAX;
}

static int set_oa_ctx_ctrl_offset(struct intel_context *ce)
{
i915_reg_t reg = GEN12_OACTXCONTROL(ce->engine->mmio_base);
struct i915_perf *perf = &ce->engine->i915->perf;
u32 offset = perf->ctx_oactxctrl_offset;

/* Do this only once. Failure is stored as offset of U32_MAX */
if (offset)
goto exit;

offset = oa_context_image_offset(ce, i915_mmio_reg_offset(reg));
perf->ctx_oactxctrl_offset = offset;

drm_dbg(&ce->engine->i915->drm,
"%s oa ctx control at 0x%08x dword offset\n",
ce->engine->name, offset);

exit:
return offset && offset != U32_MAX ? 0 : -ENODEV;
}

static bool engine_supports_mi_query(struct intel_engine_cs *engine)
{
return engine->class == RENDER_CLASS;
}

/**
* oa_get_render_ctx_id - determine and hold ctx hw id
* @stream: An i915-perf stream opened for OA metrics
Expand All @@ -1375,6 +1443,21 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
if (IS_ERR(ce))
return PTR_ERR(ce);

if (engine_supports_mi_query(stream->engine)) {
/*
* We are enabling perf query here. If we don't find the context
* offset here, just return an error.
*/
ret = set_oa_ctx_ctrl_offset(ce);
if (ret) {
intel_context_unpin(ce);
drm_err(&stream->perf->i915->drm,
"Enabling perf query failed for %s\n",
stream->engine->name);
return ret;
}
}

switch (GRAPHICS_VER(ce->engine->i915)) {
case 7: {
/*
Expand Down Expand Up @@ -2406,10 +2489,11 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream,
int err;
struct intel_context *ce = stream->pinned_ctx;
u32 format = stream->oa_buffer.format;
u32 offset = stream->perf->ctx_oactxctrl_offset;
struct flex regs_context[] = {
{
GEN8_OACTXCONTROL,
stream->perf->ctx_oactxctrl_offset + 1,
offset + 1,
active ? GEN8_OA_COUNTER_RESUME : 0,
},
};
Expand All @@ -2434,12 +2518,13 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream,
},
};

/* Modify the context image of pinned context with regs_context*/
/* Modify the context image of pinned context with regs_context */
err = intel_context_lock_pinned(ce);
if (err)
return err;

err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
err = gen8_modify_context(ce, regs_context,
ARRAY_SIZE(regs_context));
intel_context_unlock_pinned(ce);
if (err)
return err;
Expand Down Expand Up @@ -2564,6 +2649,7 @@ lrc_configure_all_contexts(struct i915_perf_stream *stream,
const struct i915_oa_config *oa_config,
struct i915_active *active)
{
u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
/* The MMIO offsets for Flex EU registers aren't contiguous */
const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
Expand All @@ -2574,7 +2660,7 @@ lrc_configure_all_contexts(struct i915_perf_stream *stream,
},
{
GEN8_OACTXCONTROL,
stream->perf->ctx_oactxctrl_offset + 1,
ctx_oactxctrl + 1,
},
{ EU_PERF_CNTL0, ctx_flexeuN(0) },
{ EU_PERF_CNTL1, ctx_flexeuN(1) },
Expand Down Expand Up @@ -4543,6 +4629,37 @@ static void oa_init_supported_formats(struct i915_perf *perf)
}
}

static void i915_perf_init_info(struct drm_i915_private *i915)
{
struct i915_perf *perf = &i915->perf;

switch (GRAPHICS_VER(i915)) {
case 8:
perf->ctx_oactxctrl_offset = 0x120;
perf->ctx_flexeu0_offset = 0x2ce;
perf->gen8_valid_ctx_bit = BIT(25);
break;
case 9:
perf->ctx_oactxctrl_offset = 0x128;
perf->ctx_flexeu0_offset = 0x3de;
perf->gen8_valid_ctx_bit = BIT(16);
break;
case 11:
perf->ctx_oactxctrl_offset = 0x124;
perf->ctx_flexeu0_offset = 0x78e;
perf->gen8_valid_ctx_bit = BIT(16);
break;
case 12:
/*
* Calculate offset at runtime in oa_pin_context for gen12 and
* cache the value in perf->ctx_oactxctrl_offset.
*/
break;
default:
MISSING_CASE(GRAPHICS_VER(i915));
}
}

/**
* i915_perf_init - initialize i915-perf state on module bind
* @i915: i915 device instance
Expand Down Expand Up @@ -4581,6 +4698,7 @@ void i915_perf_init(struct drm_i915_private *i915)
* execlist mode by default.
*/
perf->ops.read = gen8_oa_read;
i915_perf_init_info(i915);

if (IS_GRAPHICS_VER(i915, 8, 9)) {
perf->ops.is_valid_b_counter_reg =
Expand All @@ -4600,18 +4718,6 @@ void i915_perf_init(struct drm_i915_private *i915)
perf->ops.enable_metric_set = gen8_enable_metric_set;
perf->ops.disable_metric_set = gen8_disable_metric_set;
perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;

if (GRAPHICS_VER(i915) == 8) {
perf->ctx_oactxctrl_offset = 0x120;
perf->ctx_flexeu0_offset = 0x2ce;

perf->gen8_valid_ctx_bit = BIT(25);
} else {
perf->ctx_oactxctrl_offset = 0x128;
perf->ctx_flexeu0_offset = 0x3de;

perf->gen8_valid_ctx_bit = BIT(16);
}
} else if (GRAPHICS_VER(i915) == 11) {
perf->ops.is_valid_b_counter_reg =
gen7_is_valid_b_counter_addr;
Expand All @@ -4625,11 +4731,6 @@ void i915_perf_init(struct drm_i915_private *i915)
perf->ops.enable_metric_set = gen8_enable_metric_set;
perf->ops.disable_metric_set = gen11_disable_metric_set;
perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;

perf->ctx_oactxctrl_offset = 0x124;
perf->ctx_flexeu0_offset = 0x78e;

perf->gen8_valid_ctx_bit = BIT(16);
} else if (GRAPHICS_VER(i915) == 12) {
perf->ops.is_valid_b_counter_reg =
gen12_is_valid_b_counter_addr;
Expand All @@ -4643,9 +4744,6 @@ void i915_perf_init(struct drm_i915_private *i915)
perf->ops.enable_metric_set = gen12_enable_metric_set;
perf->ops.disable_metric_set = gen12_disable_metric_set;
perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read;

perf->ctx_flexeu0_offset = 0;
perf->ctx_oactxctrl_offset = 0x144;
}
}

Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/i915/i915_perf_oa_regs.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@
#define GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1
#define GEN12_OAR_OACONTROL_COUNTER_ENABLE (1 << 0)

#define GEN12_OACTXCONTROL _MMIO(0x2360)
#define GEN12_OACTXCONTROL(base) _MMIO((base) + 0x360)
#define GEN12_OAR_OASTATUS _MMIO(0x2968)

/* Gen12 OAG unit */
Expand Down

0 comments on commit a5c3a3c

Please sign in to comment.