Skip to content

Commit

Permalink
drm/i915/perf: Schedule oa_config after modifying the contexts
Browse files Browse the repository at this point in the history
We wish that the scheduler emit the context modification commands prior
to enabling the oa_config, for which we must explicitly inform it of the
ordering constraints. This is especially important as we now wait for
the final oa_config setup to be completed and as this wait may be on a
distinct context to the state modifications, we need that command packet
to be always last in the queue.

We borrow the i915_active for its ability to track multiple timelines
and the last dma_fence on each; a flexible dma_resv. Keeping track of
each dma_fence is important for us so that we can efficiently schedule
the requests and reprioritise as required.

Reported-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200327112212.16046-3-chris@chris-wilson.co.uk
  • Loading branch information
Chris Wilson committed Mar 30, 2020
1 parent 229007e commit d7d50f8
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 57 deletions.
154 changes: 99 additions & 55 deletions drivers/gpu/drm/i915/i915_perf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1916,18 +1916,19 @@ get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
return i915_vma_get(oa_bo->vma);
}

static struct i915_request *
static int
emit_oa_config(struct i915_perf_stream *stream,
struct i915_oa_config *oa_config,
struct intel_context *ce)
struct intel_context *ce,
struct i915_active *active)
{
struct i915_request *rq;
struct i915_vma *vma;
int err;

vma = get_oa_vma(stream, oa_config);
if (IS_ERR(vma))
return ERR_CAST(vma);
return PTR_ERR(vma);

err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
if (err)
Expand All @@ -1941,6 +1942,18 @@ emit_oa_config(struct i915_perf_stream *stream,
goto err_vma_unpin;
}

if (!IS_ERR_OR_NULL(active)) {
/* After all individual context modifications */
err = i915_request_await_active(rq, active,
I915_ACTIVE_AWAIT_ALL);
if (err)
goto err_add_request;

err = i915_active_add_request(active, rq);
if (err)
goto err_add_request;
}

i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, 0);
if (!err)
Expand All @@ -1955,23 +1968,23 @@ emit_oa_config(struct i915_perf_stream *stream,
if (err)
goto err_add_request;

i915_request_get(rq);
err_add_request:
i915_request_add(rq);
err_vma_unpin:
i915_vma_unpin(vma);
err_vma_put:
i915_vma_put(vma);
return err ? ERR_PTR(err) : rq;
return err;
}

static struct intel_context *oa_context(struct i915_perf_stream *stream)
{
return stream->pinned_ctx ?: stream->engine->kernel_context;
}

static struct i915_request *
hsw_enable_metric_set(struct i915_perf_stream *stream)
static int
hsw_enable_metric_set(struct i915_perf_stream *stream,
struct i915_active *active)
{
struct intel_uncore *uncore = stream->uncore;

Expand All @@ -1990,7 +2003,9 @@ hsw_enable_metric_set(struct i915_perf_stream *stream)
intel_uncore_rmw(uncore, GEN6_UCGCTL1,
0, GEN6_CSUNIT_CLOCK_GATE_DISABLE);

return emit_oa_config(stream, stream->oa_config, oa_context(stream));
return emit_oa_config(stream,
stream->oa_config, oa_context(stream),
active);
}

static void hsw_disable_metric_set(struct i915_perf_stream *stream)
Expand Down Expand Up @@ -2137,8 +2152,10 @@ static int gen8_modify_context(struct intel_context *ce,
return err;
}

static int gen8_modify_self(struct intel_context *ce,
const struct flex *flex, unsigned int count)
static int
gen8_modify_self(struct intel_context *ce,
const struct flex *flex, unsigned int count,
struct i915_active *active)
{
struct i915_request *rq;
int err;
Expand All @@ -2149,8 +2166,17 @@ static int gen8_modify_self(struct intel_context *ce,
if (IS_ERR(rq))
return PTR_ERR(rq);

if (!IS_ERR_OR_NULL(active)) {
err = i915_active_add_request(active, rq);
if (err)
goto err_add_request;
}

err = gen8_load_flex(rq, ce, flex, count);
if (err)
goto err_add_request;

err_add_request:
i915_request_add(rq);
return err;
}
Expand Down Expand Up @@ -2184,7 +2210,8 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
return err;
}

static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable)
static int gen12_configure_oar_context(struct i915_perf_stream *stream,
struct i915_active *active)
{
int err;
struct intel_context *ce = stream->pinned_ctx;
Expand All @@ -2193,7 +2220,7 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena
{
GEN8_OACTXCONTROL,
stream->perf->ctx_oactxctrl_offset + 1,
enable ? GEN8_OA_COUNTER_RESUME : 0,
active ? GEN8_OA_COUNTER_RESUME : 0,
},
};
/* Offsets in regs_lri are not used since this configuration is only
Expand All @@ -2205,13 +2232,13 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena
GEN12_OAR_OACONTROL,
GEN12_OAR_OACONTROL_OFFSET + 1,
(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
(enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0)
(active ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0)
},
{
RING_CONTEXT_CONTROL(ce->engine->mmio_base),
CTX_CONTEXT_CONTROL,
_MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
enable ?
active ?
GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE :
0)
},
Expand All @@ -2228,7 +2255,7 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena
return err;

/* Apply regs_lri using LRI with pinned context */
return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri));
return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri), active);
}

/*
Expand Down Expand Up @@ -2256,9 +2283,11 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool ena
* Note: it's only the RCS/Render context that has any OA state.
* Note: the first flex register passed must always be R_PWR_CLK_STATE
*/
static int oa_configure_all_contexts(struct i915_perf_stream *stream,
struct flex *regs,
size_t num_regs)
static int
oa_configure_all_contexts(struct i915_perf_stream *stream,
struct flex *regs,
size_t num_regs,
struct i915_active *active)
{
struct drm_i915_private *i915 = stream->perf->i915;
struct intel_engine_cs *engine;
Expand Down Expand Up @@ -2315,16 +2344,18 @@ static int oa_configure_all_contexts(struct i915_perf_stream *stream,

regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);

err = gen8_modify_self(ce, regs, num_regs);
err = gen8_modify_self(ce, regs, num_regs, active);
if (err)
return err;
}

return 0;
}

static int gen12_configure_all_contexts(struct i915_perf_stream *stream,
const struct i915_oa_config *oa_config)
static int
gen12_configure_all_contexts(struct i915_perf_stream *stream,
const struct i915_oa_config *oa_config,
struct i915_active *active)
{
struct flex regs[] = {
{
Expand All @@ -2333,11 +2364,15 @@ static int gen12_configure_all_contexts(struct i915_perf_stream *stream,
},
};

return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
return oa_configure_all_contexts(stream,
regs, ARRAY_SIZE(regs),
active);
}

static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
const struct i915_oa_config *oa_config)
static int
lrc_configure_all_contexts(struct i915_perf_stream *stream,
const struct i915_oa_config *oa_config,
struct i915_active *active)
{
/* The MMIO offsets for Flex EU registers aren't contiguous */
const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
Expand Down Expand Up @@ -2370,11 +2405,14 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
for (i = 2; i < ARRAY_SIZE(regs); i++)
regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);

return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
return oa_configure_all_contexts(stream,
regs, ARRAY_SIZE(regs),
active);
}

static struct i915_request *
gen8_enable_metric_set(struct i915_perf_stream *stream)
static int
gen8_enable_metric_set(struct i915_perf_stream *stream,
struct i915_active *active)
{
struct intel_uncore *uncore = stream->uncore;
struct i915_oa_config *oa_config = stream->oa_config;
Expand Down Expand Up @@ -2414,11 +2452,13 @@ gen8_enable_metric_set(struct i915_perf_stream *stream)
* to make sure all slices/subslices are ON before writing to NOA
* registers.
*/
ret = lrc_configure_all_contexts(stream, oa_config);
ret = lrc_configure_all_contexts(stream, oa_config, active);
if (ret)
return ERR_PTR(ret);
return ret;

return emit_oa_config(stream, oa_config, oa_context(stream));
return emit_oa_config(stream,
stream->oa_config, oa_context(stream),
active);
}

static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream)
Expand All @@ -2428,8 +2468,9 @@ static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream)
0 : GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS);
}

static struct i915_request *
gen12_enable_metric_set(struct i915_perf_stream *stream)
static int
gen12_enable_metric_set(struct i915_perf_stream *stream,
struct i915_active *active)
{
struct intel_uncore *uncore = stream->uncore;
struct i915_oa_config *oa_config = stream->oa_config;
Expand Down Expand Up @@ -2458,30 +2499,32 @@ gen12_enable_metric_set(struct i915_perf_stream *stream)
* to make sure all slices/subslices are ON before writing to NOA
* registers.
*/
ret = gen12_configure_all_contexts(stream, oa_config);
ret = gen12_configure_all_contexts(stream, oa_config, active);
if (ret)
return ERR_PTR(ret);
return ret;

/*
* For Gen12, performance counters are context
* saved/restored. Only enable it for the context that
* requested this.
*/
if (stream->ctx) {
ret = gen12_configure_oar_context(stream, true);
ret = gen12_configure_oar_context(stream, active);
if (ret)
return ERR_PTR(ret);
return ret;
}

return emit_oa_config(stream, oa_config, oa_context(stream));
return emit_oa_config(stream,
stream->oa_config, oa_context(stream),
active);
}

static void gen8_disable_metric_set(struct i915_perf_stream *stream)
{
struct intel_uncore *uncore = stream->uncore;

/* Reset all contexts' slices/subslices configurations. */
lrc_configure_all_contexts(stream, NULL);
lrc_configure_all_contexts(stream, NULL, NULL);

intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0);
}
Expand All @@ -2491,7 +2534,7 @@ static void gen10_disable_metric_set(struct i915_perf_stream *stream)
struct intel_uncore *uncore = stream->uncore;

/* Reset all contexts' slices/subslices configurations. */
lrc_configure_all_contexts(stream, NULL);
lrc_configure_all_contexts(stream, NULL, NULL);

/* Make sure we disable noa to save power. */
intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
Expand All @@ -2502,11 +2545,11 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
struct intel_uncore *uncore = stream->uncore;

/* Reset all contexts' slices/subslices configurations. */
gen12_configure_all_contexts(stream, NULL);
gen12_configure_all_contexts(stream, NULL, NULL);

/* disable the context save/restore or OAR counters */
if (stream->ctx)
gen12_configure_oar_context(stream, false);
gen12_configure_oar_context(stream, NULL);

/* Make sure we disable noa to save power. */
intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
Expand Down Expand Up @@ -2680,16 +2723,19 @@ static const struct i915_perf_stream_ops i915_oa_stream_ops = {

static int i915_perf_stream_enable_sync(struct i915_perf_stream *stream)
{
struct i915_request *rq;
struct i915_active *active;
int err;

rq = stream->perf->ops.enable_metric_set(stream);
if (IS_ERR(rq))
return PTR_ERR(rq);
active = i915_active_create();
if (!active)
return -ENOMEM;

i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
i915_request_put(rq);
err = stream->perf->ops.enable_metric_set(stream, active);
if (err == 0)
__i915_active_wait(active, TASK_UNINTERRUPTIBLE);

return 0;
i915_active_put(active);
return err;
}

static void
Expand Down Expand Up @@ -3171,7 +3217,7 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
return -EINVAL;

if (config != stream->oa_config) {
struct i915_request *rq;
int err;

/*
* If OA is bound to a specific context, emit the
Expand All @@ -3182,13 +3228,11 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
* When set globally, we use a low priority kernel context,
* so it will effectively take effect when idle.
*/
rq = emit_oa_config(stream, config, oa_context(stream));
if (!IS_ERR(rq)) {
err = emit_oa_config(stream, config, oa_context(stream), NULL);
if (!err)
config = xchg(&stream->oa_config, config);
i915_request_put(rq);
} else {
ret = PTR_ERR(rq);
}
else
ret = err;
}

i915_oa_config_put(config);
Expand Down
5 changes: 3 additions & 2 deletions drivers/gpu/drm/i915/i915_perf_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

struct drm_i915_private;
struct file;
struct i915_active;
struct i915_gem_context;
struct i915_perf;
struct i915_vma;
Expand Down Expand Up @@ -340,8 +341,8 @@ struct i915_oa_ops {
* counter reports being sampled. May apply system constraints such as
* disabling EU clock gating as required.
*/
struct i915_request *
(*enable_metric_set)(struct i915_perf_stream *stream);
int (*enable_metric_set)(struct i915_perf_stream *stream,
struct i915_active *active);

/**
* @disable_metric_set: Remove system constraints associated with using
Expand Down

0 comments on commit d7d50f8

Please sign in to comment.