Skip to content

Commit

Permalink
drm/i915: Store the default sseu setup on the engine
Browse files Browse the repository at this point in the history
As we push for better compartmentalisation, it is more convenient to
copy the default sseu configuration from the engine into the derived
logical context, than it is to dig it out from i915->runtime_info.

v2: Use intel_sseu_from_device_info() to describe the converter

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190424095134.30249-1-chris@chris-wilson.co.uk
  • Loading branch information
Chris Wilson committed Apr 24, 2019
1 parent 51eb1a1 commit 0940757
Show file tree
Hide file tree
Showing 15 changed files with 226 additions and 194 deletions.
1 change: 1 addition & 0 deletions drivers/gpu/drm/i915/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ i915-y += \
intel_lrc.o \
intel_mocs.o \
intel_ringbuffer.o \
intel_sseu.o \
intel_uncore.o \
intel_wopcm.o

Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/i915/Makefile.header-test
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ header_test := \
intel_psr.h \
intel_sdvo.h \
intel_sprite.h \
intel_sseu.h \
intel_tv.h \
intel_workarounds_types.h

Expand Down
14 changes: 0 additions & 14 deletions drivers/gpu/drm/i915/i915_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -3390,20 +3390,6 @@ mkwrite_device_info(struct drm_i915_private *dev_priv)
return (struct intel_device_info *)INTEL_INFO(dev_priv);
}

static inline struct intel_sseu
intel_device_default_sseu(struct drm_i915_private *i915)
{
const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
struct intel_sseu value = {
.slice_mask = sseu->slice_mask,
.subslice_mask = sseu->subslice_mask[0],
.min_eus_per_subslice = sseu->max_eus_per_subslice,
.max_eus_per_subslice = sseu->max_eus_per_subslice,
};

return value;
}

/* modesetting */
extern void intel_modeset_init_hw(struct drm_device *dev);
extern int intel_modeset_init(struct drm_device *dev);
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/i915/i915_gem_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -1156,7 +1156,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq,
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = lower_32_bits(offset);
*cs++ = upper_32_bits(offset);
*cs++ = gen8_make_rpcs(rq->i915, &sseu);
*cs++ = intel_sseu_make_rpcs(rq->i915, &sseu);

intel_ring_advance(rq, cs);

Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/i915/i915_perf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1679,7 +1679,7 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,

CTX_REG(reg_state,
CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
gen8_make_rpcs(i915, &ce->sseu));
intel_sseu_make_rpcs(i915, &ce->sseu));
}

/*
Expand Down
4 changes: 1 addition & 3 deletions drivers/gpu/drm/i915/intel_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -230,15 +230,13 @@ intel_context_init(struct intel_context *ce,
ce->gem_context = ctx;
ce->engine = engine;
ce->ops = engine->cops;
ce->sseu = engine->sseu;

INIT_LIST_HEAD(&ce->signal_link);
INIT_LIST_HEAD(&ce->signals);

mutex_init(&ce->pin_mutex);

/* Use the whole device by default */
ce->sseu = intel_device_default_sseu(ctx->i915);

i915_active_request_init(&ce->active_tracker,
NULL, intel_context_retire);
}
Expand Down
11 changes: 1 addition & 10 deletions drivers/gpu/drm/i915/intel_context_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <linux/types.h>

#include "i915_active_types.h"
#include "intel_sseu.h"

struct i915_gem_context;
struct i915_vma;
Expand All @@ -28,16 +29,6 @@ struct intel_context_ops {
void (*destroy)(struct kref *kref);
};

/*
* Powergating configuration for a particular (context,engine).
*/
struct intel_sseu {
u8 slice_mask;
u8 subslice_mask;
u8 min_eus_per_subslice;
u8 max_eus_per_subslice;
};

struct intel_context {
struct kref ref;

Expand Down
28 changes: 1 addition & 27 deletions drivers/gpu/drm/i915/intel_device_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

#include "intel_engine_types.h"
#include "intel_display.h"
#include "intel_sseu.h"

struct drm_printer;
struct drm_i915_private;
Expand Down Expand Up @@ -140,33 +141,6 @@ enum intel_ppgtt_type {
func(overlay_needs_physical); \
func(supports_tv);

#define GEN_MAX_SLICES (6) /* CNL upper bound */
#define GEN_MAX_SUBSLICES (8) /* ICL upper bound */

struct sseu_dev_info {
u8 slice_mask;
u8 subslice_mask[GEN_MAX_SLICES];
u16 eu_total;
u8 eu_per_subslice;
u8 min_eu_in_pool;
/* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
u8 subslice_7eu[3];
u8 has_slice_pg:1;
u8 has_subslice_pg:1;
u8 has_eu_pg:1;

/* Topology fields */
u8 max_slices;
u8 max_subslices;
u8 max_eus_per_subslice;

/* We don't have more than 8 eus per subslice at the moment and as we
* store eus enabled using bits, no need to multiply by eus per
* subslice.
*/
u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
};

struct intel_device_info {
u16 gen_mask;

Expand Down
4 changes: 4 additions & 0 deletions drivers/gpu/drm/i915/intel_engine_cs.c
Original file line number Diff line number Diff line change
Expand Up @@ -588,6 +588,10 @@ int intel_engine_setup_common(struct intel_engine_cs *engine)
intel_engine_init_batch_pool(engine);
intel_engine_init_cmd_parser(engine);

/* Use the whole device by default */
engine->sseu =
intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu);

return 0;

err_hwsp:
Expand Down
3 changes: 3 additions & 0 deletions drivers/gpu/drm/i915/intel_engine_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "i915_priolist_types.h"
#include "i915_selftest.h"
#include "i915_timeline_types.h"
#include "intel_sseu.h"
#include "intel_workarounds_types.h"

#include "i915_gem_batch_pool.h"
Expand Down Expand Up @@ -278,6 +279,8 @@ struct intel_engine_cs {
u32 context_size;
u32 mmio_base;

struct intel_sseu sseu;

struct intel_ring *buffer;

struct i915_timeline timeline;
Expand Down
134 changes: 1 addition & 133 deletions drivers/gpu/drm/i915/intel_lrc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1232,7 +1232,7 @@ __execlists_update_reg_state(struct intel_context *ce,
/* RPCS */
if (engine->class == RENDER_CLASS)
regs[CTX_R_PWR_CLK_STATE + 1] =
gen8_make_rpcs(engine->i915, &ce->sseu);
intel_sseu_make_rpcs(engine->i915, &ce->sseu);
}

static int
Expand Down Expand Up @@ -2551,138 +2551,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine)
return logical_ring_init(engine);
}

u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu)
{
const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
bool subslice_pg = sseu->has_subslice_pg;
struct intel_sseu ctx_sseu;
u8 slices, subslices;
u32 rpcs = 0;

/*
* No explicit RPCS request is needed to ensure full
* slice/subslice/EU enablement prior to Gen9.
*/
if (INTEL_GEN(i915) < 9)
return 0;

/*
* If i915/perf is active, we want a stable powergating configuration
* on the system.
*
* We could choose full enablement, but on ICL we know there are use
* cases which disable slices for functional, apart for performance
* reasons. So in this case we select a known stable subset.
*/
if (!i915->perf.oa.exclusive_stream) {
ctx_sseu = *req_sseu;
} else {
ctx_sseu = intel_device_default_sseu(i915);

if (IS_GEN(i915, 11)) {
/*
* We only need subslice count so it doesn't matter
* which ones we select - just turn off low bits in the
* amount of half of all available subslices per slice.
*/
ctx_sseu.subslice_mask =
~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2));
ctx_sseu.slice_mask = 0x1;
}
}

slices = hweight8(ctx_sseu.slice_mask);
subslices = hweight8(ctx_sseu.subslice_mask);

/*
* Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits
* wide and Icelake has up to eight subslices, specfial programming is
* needed in order to correctly enable all subslices.
*
* According to documentation software must consider the configuration
* as 2x4x8 and hardware will translate this to 1x8x8.
*
* Furthemore, even though SScount is three bits, maximum documented
* value for it is four. From this some rules/restrictions follow:
*
* 1.
* If enabled subslice count is greater than four, two whole slices must
* be enabled instead.
*
* 2.
* When more than one slice is enabled, hardware ignores the subslice
* count altogether.
*
* From these restrictions it follows that it is not possible to enable
* a count of subslices between the SScount maximum of four restriction,
* and the maximum available number on a particular SKU. Either all
* subslices are enabled, or a count between one and four on the first
* slice.
*/
if (IS_GEN(i915, 11) &&
slices == 1 &&
subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) {
GEM_BUG_ON(subslices & 1);

subslice_pg = false;
slices *= 2;
}

/*
* Starting in Gen9, render power gating can leave
* slice/subslice/EU in a partially enabled state. We
* must make an explicit request through RPCS for full
* enablement.
*/
if (sseu->has_slice_pg) {
u32 mask, val = slices;

if (INTEL_GEN(i915) >= 11) {
mask = GEN11_RPCS_S_CNT_MASK;
val <<= GEN11_RPCS_S_CNT_SHIFT;
} else {
mask = GEN8_RPCS_S_CNT_MASK;
val <<= GEN8_RPCS_S_CNT_SHIFT;
}

GEM_BUG_ON(val & ~mask);
val &= mask;

rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val;
}

if (subslice_pg) {
u32 val = subslices;

val <<= GEN8_RPCS_SS_CNT_SHIFT;

GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK);
val &= GEN8_RPCS_SS_CNT_MASK;

rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val;
}

if (sseu->has_eu_pg) {
u32 val;

val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
val &= GEN8_RPCS_EU_MIN_MASK;

rpcs |= val;

val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
val &= GEN8_RPCS_EU_MAX_MASK;

rpcs |= val;

rpcs |= GEN8_RPCS_ENABLE;
}

return rpcs;
}

static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
{
u32 indirect_ctx_offset;
Expand Down
2 changes: 0 additions & 2 deletions drivers/gpu/drm/i915/intel_lrc.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,4 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
const char *prefix),
unsigned int max);

u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu);

#endif /* _INTEL_LRC_H_ */
Loading

0 comments on commit 0940757

Please sign in to comment.