Skip to content

Commit

Permalink
Merge tag 'drm-intel-gt-next-2024-04-26' of https://anongit.freedeskt…
Browse files Browse the repository at this point in the history
…op.org/git/drm/drm-intel into drm-next

UAPI Changes:

- drm/i915/guc: Use context hints for GT frequency

    Allow user to provide a low latency context hint. When set, KMD
    sends a hint to GuC which results in special handling for this
    context. SLPC will ramp the GT frequency aggressively every time
    it switches to this context. The down freq threshold will also be
    lower so GuC will ramp down the GT freq for this context more slowly.
    We also disable waitboost for this context as that will interfere with
    the strategy.

    We need to enable the use of SLPC Compute strategy during init, but
    it will apply only to contexts that set this bit during context
    creation.

    Userland can check whether this feature is supported using a new param-
    I915_PARAM_HAS_CONTEXT_FREQ_HINT. This flag is true for all guc submission
    enabled platforms as they use SLPC for frequency management.

    The Mesa usage model for this flag is here -
    https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint

- drm/i915/gt: Enable only one CCS for compute workload

    Enable only one CCS engine by default with all the compute sices
    allocated to it.

    While generating the list of UABI engines to be exposed to the
    user, exclude any additional CCS engines beyond the first
    instance

    ***

    NOTE: This W/A will make all DG2 SKUs appear like single CCS SKUs by
    default to mitigate a hardware bug. All the EUs will still remain
    usable, and all the userspace drivers have been confirmed to be able
    to dynamically detect the change in number of CCS engines and adjust.

    For the smaller percent of applications that get perf benefit from
    letting the userspace driver dispatch across all 4 CCS engines we will
    be introducing a sysfs control as a later patch to choose 4 CCS each
    with 25% EUs (or 50% if 2 CCS).

    NOTE: A regression has been reported at

    https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/10895

    However Andi has been triaging the issue and we're closing in a fix
    to the gap in the W/A implementation:

    https://lists.freedesktop.org/archives/intel-gfx/2024-April/348747.html

Driver Changes:

- Add new and fix to existing workarounds: Wa_14018575942 (MTL),
  Wa_16019325821 (Gen12.70), Wa_14019159160 (MTL), Wa_16015675438,
  Wa_14020495402 (Gen12.70) (Tejas, John, Lucas)
- Fix UAF on destroy against retire race and remove two earlier
  partial fixes (Janusz)
- Limit the reserved VM space to only the platforms that need it (Andi)
- Reset queue_priority_hint on parking for execlist platforms (Chris)
- Fix gt reset with GuC submission is disabled (Nirmoy)
- Correct capture of EIR register on hang (John)

- Remove usage of the deprecated ida_simple_xx() API
- Refactor confusing __intel_gt_reset() (Nirmoy)
- Fix the fix for GuC reset lock confusion (John)
- Simplify/extend platform check for Wa_14018913170 (John)
- Replace dev_priv with i915 (Andi)
- Add and use gt_to_guc() wrapper (Andi)
- Remove bogus null check (Rodrigo, Dan)

. Selftest improvements (Janusz, Nirmoy, Daniele)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/ZitVBTvZmityDi7D@jlahtine-mobl.ger.corp.intel.com
  • Loading branch information
Dave Airlie committed Apr 30, 2024
2 parents b84bc94 + 4d3421e commit 68b89e2
Show file tree
Hide file tree
Showing 54 changed files with 414 additions and 156 deletions.
16 changes: 14 additions & 2 deletions drivers/gpu/drm/i915/gem/i915_gem_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv,
struct i915_gem_proto_context *pc,
struct drm_i915_gem_context_param *args)
{
struct drm_i915_private *i915 = fpriv->i915;
int ret = 0;

switch (args->param) {
Expand All @@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv,
pc->user_flags &= ~BIT(UCONTEXT_BANNABLE);
break;

case I915_CONTEXT_PARAM_LOW_LATENCY:
if (intel_uc_uses_guc_submission(&to_gt(i915)->uc))
pc->user_flags |= BIT(UCONTEXT_LOW_LATENCY);
else
ret = -EINVAL;
break;

case I915_CONTEXT_PARAM_RECOVERABLE:
if (args->size)
ret = -EINVAL;
Expand Down Expand Up @@ -992,6 +1000,9 @@ static int intel_context_set_gem(struct intel_context *ce,
if (sseu.slice_mask && !WARN_ON(ce->engine->class != RENDER_CLASS))
ret = intel_context_reconfigure_sseu(ce, sseu);

if (test_bit(UCONTEXT_LOW_LATENCY, &ctx->user_flags))
__set_bit(CONTEXT_LOW_LATENCY, &ce->flags);

return ret;
}

Expand Down Expand Up @@ -1630,6 +1641,9 @@ i915_gem_create_context(struct drm_i915_private *i915,
if (vm)
ctx->vm = vm;

/* Assign early so intel_context_set_gem can access these flags */
ctx->user_flags = pc->user_flags;

mutex_init(&ctx->engines_mutex);
if (pc->num_user_engines >= 0) {
i915_gem_context_set_user_engines(ctx);
Expand All @@ -1652,8 +1666,6 @@ i915_gem_create_context(struct drm_i915_private *i915,
* is no remap info, it will be a NOP. */
ctx->remap_slice = ALL_L3_SLICES(i915);

ctx->user_flags = pc->user_flags;

for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;

Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/i915/gem/i915_gem_context_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,7 @@ struct i915_gem_context {
#define UCONTEXT_BANNABLE 2
#define UCONTEXT_RECOVERABLE 3
#define UCONTEXT_PERSISTENCE 4
#define UCONTEXT_LOW_LATENCY 5

/**
* @flags: small set of booleans
Expand Down
22 changes: 2 additions & 20 deletions drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,6 @@ struct i915_execbuffer {
struct intel_context *context; /* logical state for the request */
struct i915_gem_context *gem_context; /** caller's context */
intel_wakeref_t wakeref;
intel_wakeref_t wakeref_gt0;

/** our requests to build */
struct i915_request *requests[MAX_ENGINE_INSTANCE + 1];
Expand Down Expand Up @@ -2457,15 +2456,15 @@ static int eb_submit(struct i915_execbuffer *eb)
* The engine index is returned.
*/
static unsigned int
gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
gen8_dispatch_bsd_engine(struct drm_i915_private *i915,
struct drm_file *file)
{
struct drm_i915_file_private *file_priv = file->driver_priv;

/* Check whether the file_priv has already selected one ring. */
if ((int)file_priv->bsd_engine < 0)
file_priv->bsd_engine =
get_random_u32_below(dev_priv->engine_uabi_class_count[I915_ENGINE_CLASS_VIDEO]);
get_random_u32_below(i915->engine_uabi_class_count[I915_ENGINE_CLASS_VIDEO]);

return file_priv->bsd_engine;
}
Expand Down Expand Up @@ -2686,7 +2685,6 @@ static int
eb_select_engine(struct i915_execbuffer *eb)
{
struct intel_context *ce, *child;
struct intel_gt *gt;
unsigned int idx;
int err;

Expand All @@ -2710,17 +2708,10 @@ eb_select_engine(struct i915_execbuffer *eb)
}
}
eb->num_batches = ce->parallel.number_children + 1;
gt = ce->engine->gt;

for_each_child(ce, child)
intel_context_get(child);
eb->wakeref = intel_gt_pm_get(ce->engine->gt);
/*
* Keep GT0 active on MTL so that i915_vma_parked() doesn't
* free VMAs while execbuf ioctl is validating VMAs.
*/
if (gt->info.id)
eb->wakeref_gt0 = intel_gt_pm_get(to_gt(gt->i915));

if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
err = intel_context_alloc_state(ce);
Expand Down Expand Up @@ -2759,9 +2750,6 @@ eb_select_engine(struct i915_execbuffer *eb)
return err;

err:
if (gt->info.id)
intel_gt_pm_put(to_gt(gt->i915), eb->wakeref_gt0);

intel_gt_pm_put(ce->engine->gt, eb->wakeref);
for_each_child(ce, child)
intel_context_put(child);
Expand All @@ -2775,12 +2763,6 @@ eb_put_engine(struct i915_execbuffer *eb)
struct intel_context *child;

i915_vm_put(eb->context->vm);
/*
* This works in conjunction with eb_select_engine() to prevent
* i915_vma_parked() from interfering while execbuf validates vmas.
*/
if (eb->gt->info.id)
intel_gt_pm_put(to_gt(eb->gt->i915), eb->wakeref_gt0);
intel_gt_pm_put(eb->context->engine->gt, eb->wakeref);
for_each_child(eb->context, child)
intel_context_put(child);
Expand Down
6 changes: 3 additions & 3 deletions drivers/gpu/drm/i915/gem/i915_gem_shmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,7 @@ i915_gem_object_create_shmem(struct drm_i915_private *i915,

/* Allocate a new GEM object and fill it with the supplied data */
struct drm_i915_gem_object *
i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv,
i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915,
const void *data, resource_size_t size)
{
struct drm_i915_gem_object *obj;
Expand All @@ -663,8 +663,8 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv,
resource_size_t offset;
int err;

GEM_WARN_ON(IS_DGFX(dev_priv));
obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE));
GEM_WARN_ON(IS_DGFX(i915));
obj = i915_gem_object_create_shmem(i915, round_up(size, PAGE_SIZE));
if (IS_ERR(obj))
return obj;

Expand Down
8 changes: 4 additions & 4 deletions drivers/gpu/drm/i915/gem/i915_gem_stolen.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@ struct drm_i915_gem_object;

#define i915_stolen_fb drm_mm_node

int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv,
int i915_gem_stolen_insert_node(struct drm_i915_private *i915,
struct drm_mm_node *node, u64 size,
unsigned alignment);
int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv,
int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *i915,
struct drm_mm_node *node, u64 size,
unsigned alignment, u64 start,
u64 end);
void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv,
void i915_gem_stolen_remove_node(struct drm_i915_private *i915,
struct drm_mm_node *node);
struct intel_memory_region *
i915_gem_stolen_smem_setup(struct drm_i915_private *i915, u16 type,
Expand All @@ -31,7 +31,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
u16 instance);

struct drm_i915_gem_object *
i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
i915_gem_object_create_stolen(struct drm_i915_private *i915,
resource_size_t size);

bool i915_gem_object_is_stolen(const struct drm_i915_gem_object *obj);
Expand Down
18 changes: 9 additions & 9 deletions drivers/gpu/drm/i915/gem/i915_gem_tiling.c
Original file line number Diff line number Diff line change
Expand Up @@ -343,12 +343,12 @@ int
i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_gem_set_tiling *args = data;
struct drm_i915_gem_object *obj;
int err;

if (!to_gt(dev_priv)->ggtt->num_fences)
if (!to_gt(i915)->ggtt->num_fences)
return -EOPNOTSUPP;

obj = i915_gem_object_lookup(file, args->handle);
Expand All @@ -374,9 +374,9 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
args->stride = 0;
} else {
if (args->tiling_mode == I915_TILING_X)
args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_x;
args->swizzle_mode = to_gt(i915)->ggtt->bit_6_swizzle_x;
else
args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_y;
args->swizzle_mode = to_gt(i915)->ggtt->bit_6_swizzle_y;

/* Hide bit 17 swizzling from the user. This prevents old Mesa
* from aborting the application on sw fallbacks to bit 17,
Expand Down Expand Up @@ -427,11 +427,11 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
struct drm_i915_gem_get_tiling *args = data;
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_gem_object *obj;
int err = -ENOENT;

if (!to_gt(dev_priv)->ggtt->num_fences)
if (!to_gt(i915)->ggtt->num_fences)
return -EOPNOTSUPP;

rcu_read_lock();
Expand All @@ -447,10 +447,10 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,

switch (args->tiling_mode) {
case I915_TILING_X:
args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_x;
args->swizzle_mode = to_gt(i915)->ggtt->bit_6_swizzle_x;
break;
case I915_TILING_Y:
args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_y;
args->swizzle_mode = to_gt(i915)->ggtt->bit_6_swizzle_y;
break;
default:
case I915_TILING_NONE:
Expand All @@ -459,7 +459,7 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
}

/* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */
if (dev_priv->gem_quirks & GEM_QUIRK_PIN_SWIZZLED_PAGES)
if (i915->gem_quirks & GEM_QUIRK_PIN_SWIZZLED_PAGES)
args->phys_swizzle_mode = I915_BIT_6_SWIZZLE_UNKNOWN;
else
args->phys_swizzle_mode = args->swizzle_mode;
Expand Down
6 changes: 3 additions & 3 deletions drivers/gpu/drm/i915/gem/i915_gem_userptr.c
Original file line number Diff line number Diff line change
Expand Up @@ -463,13 +463,13 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
struct drm_file *file)
{
static struct lock_class_key __maybe_unused lock_class;
struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_gem_userptr *args = data;
struct drm_i915_gem_object __maybe_unused *obj;
int __maybe_unused ret;
u32 __maybe_unused handle;

if (!HAS_LLC(dev_priv) && !HAS_SNOOP(dev_priv)) {
if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) {
/* We cannot support coherent userptr objects on hw without
* LLC and broken snooping.
*/
Expand Down Expand Up @@ -501,7 +501,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
* On almost all of the older hw, we cannot tell the GPU that
* a page is readonly.
*/
if (!to_gt(dev_priv)->vm->has_read_only)
if (!to_gt(i915)->vm->has_read_only)
return -ENODEV;
}

Expand Down
14 changes: 7 additions & 7 deletions drivers/gpu/drm/i915/gem/selftests/huge_pages.c
Original file line number Diff line number Diff line change
Expand Up @@ -1969,19 +1969,19 @@ int i915_gem_huge_page_mock_selftests(void)
SUBTEST(igt_mock_memory_region_huge_pages),
SUBTEST(igt_mock_ppgtt_misaligned_dma),
};
struct drm_i915_private *dev_priv;
struct drm_i915_private *i915;
struct i915_ppgtt *ppgtt;
int err;

dev_priv = mock_gem_device();
if (!dev_priv)
i915 = mock_gem_device();
if (!i915)
return -ENOMEM;

/* Pretend to be a device which supports the 48b PPGTT */
RUNTIME_INFO(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL;
RUNTIME_INFO(dev_priv)->ppgtt_size = 48;
RUNTIME_INFO(i915)->ppgtt_type = INTEL_PPGTT_FULL;
RUNTIME_INFO(i915)->ppgtt_size = 48;

ppgtt = i915_ppgtt_create(to_gt(dev_priv), 0);
ppgtt = i915_ppgtt_create(to_gt(i915), 0);
if (IS_ERR(ppgtt)) {
err = PTR_ERR(ppgtt);
goto out_unlock;
Expand All @@ -2005,7 +2005,7 @@ int i915_gem_huge_page_mock_selftests(void)
out_put:
i915_vm_put(&ppgtt->vm);
out_unlock:
mock_destroy_device(dev_priv);
mock_destroy_device(i915);
return err;
}

Expand Down
5 changes: 4 additions & 1 deletion drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "i915_drv.h"
#include "i915_selftest.h"
#include "gem/i915_gem_context.h"
#include "gt/intel_gt.h"

#include "mock_context.h"
#include "mock_dmabuf.h"
Expand Down Expand Up @@ -155,6 +156,7 @@ static int verify_access(struct drm_i915_private *i915,
struct file *file;
u32 *vaddr;
int err = 0, i;
unsigned int mode;

file = mock_file(i915);
if (IS_ERR(file))
Expand Down Expand Up @@ -194,7 +196,8 @@ static int verify_access(struct drm_i915_private *i915,
if (err)
goto out_file;

vaddr = i915_gem_object_pin_map_unlocked(native_obj, I915_MAP_WB);
mode = intel_gt_coherent_map_type(to_gt(i915), native_obj, true);
vaddr = i915_gem_object_pin_map_unlocked(native_obj, mode);
if (IS_ERR(vaddr)) {
err = PTR_ERR(vaddr);
goto out_file;
Expand Down
22 changes: 14 additions & 8 deletions drivers/gpu/drm/i915/gt/gen8_engine_cs.c
Original file line number Diff line number Diff line change
Expand Up @@ -740,21 +740,25 @@ static u32 *gen12_emit_preempt_busywait(struct i915_request *rq, u32 *cs)
}

/* Wa_14014475959:dg2 */
#define CCS_SEMAPHORE_PPHWSP_OFFSET 0x540
static u32 ccs_semaphore_offset(struct i915_request *rq)
/* Wa_16019325821 */
/* Wa_14019159160 */
#define HOLD_SWITCHOUT_SEMAPHORE_PPHWSP_OFFSET 0x540
static u32 hold_switchout_semaphore_offset(struct i915_request *rq)
{
return i915_ggtt_offset(rq->context->state) +
(LRC_PPHWSP_PN * PAGE_SIZE) + CCS_SEMAPHORE_PPHWSP_OFFSET;
(LRC_PPHWSP_PN * PAGE_SIZE) + HOLD_SWITCHOUT_SEMAPHORE_PPHWSP_OFFSET;
}

/* Wa_14014475959:dg2 */
static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs)
/* Wa_16019325821 */
/* Wa_14019159160 */
static u32 *hold_switchout_emit_wa_busywait(struct i915_request *rq, u32 *cs)
{
int i;

*cs++ = MI_ATOMIC_INLINE | MI_ATOMIC_GLOBAL_GTT | MI_ATOMIC_CS_STALL |
MI_ATOMIC_MOVE;
*cs++ = ccs_semaphore_offset(rq);
*cs++ = hold_switchout_semaphore_offset(rq);
*cs++ = 0;
*cs++ = 1;

Expand All @@ -770,7 +774,7 @@ static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs)
MI_SEMAPHORE_POLL |
MI_SEMAPHORE_SAD_EQ_SDD;
*cs++ = 0;
*cs++ = ccs_semaphore_offset(rq);
*cs++ = hold_switchout_semaphore_offset(rq);
*cs++ = 0;

return cs;
Expand All @@ -787,8 +791,10 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs)
cs = gen12_emit_preempt_busywait(rq, cs);

/* Wa_14014475959:dg2 */
if (intel_engine_uses_wa_hold_ccs_switchout(rq->engine))
cs = ccs_emit_wa_busywait(rq, cs);
/* Wa_16019325821 */
/* Wa_14019159160 */
if (intel_engine_uses_wa_hold_switchout(rq->engine))
cs = hold_switchout_emit_wa_busywait(rq, cs);

rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail);
Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/i915/gt/intel_context_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ struct intel_context {
#define CONTEXT_PERMA_PIN 11
#define CONTEXT_IS_PARKING 12
#define CONTEXT_EXITING 13
#define CONTEXT_LOW_LATENCY 14

struct {
u64 timeout_us;
Expand Down
Loading

0 comments on commit 68b89e2

Please sign in to comment.