From 4cfca03f76413db115c3cc18f4370debb1b81b2b Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Fri, 26 Apr 2024 02:07:23 +0200 Subject: [PATCH 01/15] drm/i915/gt: Automate CCS Mode setting during engine resets We missed setting the CCS mode during resume and engine resets. Create a workaround to be added in the engine's workaround list. This workaround sets the XEHP_CCS_MODE value at every reset. The issue can be reproduced by running: $ clpeak --kernel-latency Without resetting the CCS mode, we encounter a fence timeout: Fence expiration time out i915-0000:03:00.0:clpeak[2387]:2! Fixes: 2bebae0112b1 ("drm/i915/gt: Enable only one CCS for compute workload") Reported-by: Gnattu OC Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/10895 Signed-off-by: Andi Shyti Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Matt Roper Cc: # v6.2+ Tested-by: Gnattu OC Reviewed-by: Rodrigo Vivi Tested-by: Krzysztof Gibala Link: https://patchwork.freedesktop.org/patch/msgid/20240426000723.229296-1-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c | 6 +++--- drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h | 2 +- drivers/gpu/drm/i915/gt/intel_workarounds.c | 4 +++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c index 044219c5960a5..99b71bb7da0a6 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c @@ -8,14 +8,14 @@ #include "intel_gt_ccs_mode.h" #include "intel_gt_regs.h" -void intel_gt_apply_ccs_mode(struct intel_gt *gt) +unsigned int intel_gt_apply_ccs_mode(struct intel_gt *gt) { int cslice; u32 mode = 0; int first_ccs = __ffs(CCS_MASK(gt)); if (!IS_DG2(gt->i915)) - return; + return 0; /* Build the value for the fixed CCS load balancing */ for (cslice = 0; cslice < I915_MAX_CCS; cslice++) { @@ -35,5 +35,5 @@ void intel_gt_apply_ccs_mode(struct intel_gt *gt) XEHP_CCS_MODE_CSLICE_MASK); } - intel_uncore_write(gt->uncore, XEHP_CCS_MODE, mode); + return mode; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h index 9e5549caeb269..55547f2ff426a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h @@ -8,6 +8,6 @@ struct intel_gt; -void intel_gt_apply_ccs_mode(struct intel_gt *gt); +unsigned int intel_gt_apply_ccs_mode(struct intel_gt *gt); #endif /* __INTEL_GT_CCS_MODE_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 71dc6f10a0371..6d5efd46a9875 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2858,6 +2858,7 @@ add_render_compute_tuning_settings(struct intel_gt *gt, static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct intel_gt *gt = engine->gt; + u32 mode; if (!IS_DG2(gt->i915)) return; @@ -2874,7 +2875,8 @@ static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct i915_wa_li * After having disabled automatic load balancing we need to * assign all slices to a single CCS. We will call it CCS mode 1 */ - intel_gt_apply_ccs_mode(gt); + mode = intel_gt_apply_ccs_mode(gt); + wa_masked_en(wal, XEHP_CCS_MODE, mode); } /* From 749670a58d935303ad1ce529acc73f12de25832e Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Mon, 6 May 2024 20:02:50 +0200 Subject: [PATCH 02/15] Revert "drm/i915: Remove extra multi-gt pm-references" This reverts commit 1f33dc0c1189efb9ae19c6fc22b64dd3e26261fb. There was a patch supposed to fix an issue of illegal attempts to free a still active i915 VMA object when parking a GT believed to be idle, reported by CI on 2-GT Meteor Lake. As a solution, an extra wakeref for a Primary GT was acquired from i915_gem_do_execbuffer() -- see commit f56fe3e91787 ("drm/i915: Fix a VMA UAF for multi-gt platform"). However, that fix occurred insufficient -- the issue was still reported by CI. That wakeref was released on exit from i915_gem_do_execbuffer(), then potentially before completion of the request and deactivation of its associated VMAs. Moreover, CI reports indicated that single-GT platforms also suffered sporadically from the same race. Since that issue was fixed by another commit f3c71b2ded5c ("drm/i915/vma: Fix UAF on destroy against retire race"), the changes introduced by that insufficient fix were dropped as no longer useful. However, that series resulted in another VMA UAF scenario now being triggered in CI. <4> [260.290809] ------------[ cut here ]------------ <4> [260.290988] list_del corruption. prev->next should be ffff888118c5d990, but was ffff888118c5a510. (prev=ffff888118c5a510) <4> [260.291004] WARNING: CPU: 2 PID: 1143 at lib/list_debug.c:62 __list_del_entry_valid_or_report+0xb7/0xe0 .. <4> [260.291055] CPU: 2 PID: 1143 Comm: kms_plane Not tainted 6.9.0-rc2-CI_DRM_14524-ga25d180c6853+ #1 <4> [260.291058] Hardware name: Intel Corporation Meteor Lake Client Platform/MTL-P LP5x T3 RVP, BIOS MTLPFWI1.R00.3471.D91.2401310918 01/31/2024 <4> [260.291060] RIP: 0010:__list_del_entry_valid_or_report+0xb7/0xe0 ... <4> [260.291087] Call Trace: <4> [260.291089] <4> [260.291124] i915_vma_reopen+0x43/0x80 [i915] <4> [260.291298] eb_lookup_vmas+0x9cb/0xcc0 [i915] <4> [260.291579] i915_gem_do_execbuffer+0xc9a/0x26d0 [i915] <4> [260.291883] i915_gem_execbuffer2_ioctl+0x123/0x2a0 [i915] ... <4> [260.292301] ... <4> [260.292506] ---[ end trace 0000000000000000 ]--- <4> [260.292782] general protection fault, probably for non-canonical address 0x6b6b6b6b6b6b6ca3: 0000 [#1] PREEMPT SMP NOPTI <4> [260.303575] CPU: 2 PID: 1143 Comm: kms_plane Tainted: G W 6.9.0-rc2-CI_DRM_14524-ga25d180c6853+ #1 <4> [260.313851] Hardware name: Intel Corporation Meteor Lake Client Platform/MTL-P LP5x T3 RVP, BIOS MTLPFWI1.R00.3471.D91.2401310918 01/31/2024 <4> [260.326359] RIP: 0010:eb_validate_vmas+0x114/0xd80 [i915] ... <4> [260.428756] Call Trace: <4> [260.431192] <4> [639.283393] i915_gem_do_execbuffer+0xd05/0x26d0 [i915] <4> [639.305245] i915_gem_execbuffer2_ioctl+0x123/0x2a0 [i915] ... <4> [639.411134] ... <4> [639.449979] ---[ end trace 0000000000000000 ]--- We defer actually closing, unbinding and destroying a VMA until next idle point, or until the object is freed in the meantime. By postponing the unbind, we allow for the VMA to be reopened by the client, avoiding the work required to rebind the VMA. Starting from commit b0647a5e79b1 ("drm/i915: Avoid live-lock with i915_vma_parked()"), we assume that as long as a GT is held idle, no VMA would be reopened while we destroy them. That assumption is no longer true in multi-GT configurations, where a VMA we reopen may be handled by a GT different from the one that we already keep active via its engine while we set up an execbuf request. Restoring the extra GT0 PM wakeref removed from i915_gem_do_execbuffer() processing path seems to fix this issue. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10608 Signed-off-by: Janusz Krzysztofik Cc: Rodrigo Vivi Cc: Nirmoy Das Reviewed-by: Nirmoy Das Fixes: 1f33dc0c1189 ("drm/i915: Remove extra multi-gt pm-references") Link: https://patchwork.freedesktop.org/patch/msgid/20240506180253.96858-2-janusz.krzysztofik@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 949265131514d..5be18c2e8bfd3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -254,6 +254,7 @@ struct i915_execbuffer { struct intel_context *context; /* logical state for the request */ struct i915_gem_context *gem_context; /** caller's context */ intel_wakeref_t wakeref; + intel_wakeref_t wakeref_gt0; /** our requests to build */ struct i915_request *requests[MAX_ENGINE_INSTANCE + 1]; @@ -2684,6 +2685,7 @@ static int eb_select_engine(struct i915_execbuffer *eb) { struct intel_context *ce, *child; + struct intel_gt *gt; unsigned int idx; int err; @@ -2707,10 +2709,17 @@ eb_select_engine(struct i915_execbuffer *eb) } } eb->num_batches = ce->parallel.number_children + 1; + gt = ce->engine->gt; for_each_child(ce, child) intel_context_get(child); eb->wakeref = intel_gt_pm_get(ce->engine->gt); + /* + * Keep GT0 active on MTL so that i915_vma_parked() doesn't + * free VMAs while execbuf ioctl is validating VMAs. + */ + if (gt->info.id) + eb->wakeref_gt0 = intel_gt_pm_get(to_gt(gt->i915)); if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { err = intel_context_alloc_state(ce); @@ -2749,6 +2758,9 @@ eb_select_engine(struct i915_execbuffer *eb) return err; err: + if (gt->info.id) + intel_gt_pm_put(to_gt(gt->i915), eb->wakeref_gt0); + intel_gt_pm_put(ce->engine->gt, eb->wakeref); for_each_child(ce, child) intel_context_put(child); @@ -2762,6 +2774,12 @@ eb_put_engine(struct i915_execbuffer *eb) struct intel_context *child; i915_vm_put(eb->context->vm); + /* + * This works in conjunction with eb_select_engine() to prevent + * i915_vma_parked() from interfering while execbuf validates vmas. + */ + if (eb->gt->info.id) + intel_gt_pm_put(to_gt(eb->gt->i915), eb->wakeref_gt0); intel_gt_pm_put(eb->context->engine->gt, eb->wakeref); for_each_child(eb->context, child) intel_context_put(child); From 8b69ac66d68907deb473b310b4f2c2a6b26ef191 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 2 May 2024 15:14:21 +0300 Subject: [PATCH 03/15] drm/i915: Fix HAS_REGION() usage in intel_gt_probe_lmem() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HAS_REGION() takes a bitmask, not the region ID. This causes the GEM_BUG_ON() to assert that the SMEM region is available rather than the intended LMEM region. No real harm since SMEM is always available, but also not checking what was intended. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240502121423.1002-1-ville.syrjala@linux.intel.com Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_gt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 45920cda0cc77..96b444905d8ce 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -105,7 +105,7 @@ static int intel_gt_probe_lmem(struct intel_gt *gt) intel_memory_region_set_name(mem, "local%u", mem->instance); - GEM_BUG_ON(!HAS_REGION(i915, id)); + GEM_BUG_ON(!HAS_REGION(i915, BIT(id))); GEM_BUG_ON(i915->mm.regions[id]); i915->mm.regions[id] = mem; From d082c05a635e59764c5337a5da7f4287b8a80476 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 2 May 2024 15:14:22 +0300 Subject: [PATCH 04/15] drm/i915: Pass the region ID rather than a bitmask to HAS_REGION() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The name 'HAS_REGION()' suggests we are checking for a single region, so seem more sensible to pass in the region ID rather than a bitmask. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240502121423.1002-2-ville.syrjala@linux.intel.com Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_gt.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- drivers/gpu/drm/i915/intel_memory_region.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 96b444905d8ce..45920cda0cc77 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -105,7 +105,7 @@ static int intel_gt_probe_lmem(struct intel_gt *gt) intel_memory_region_set_name(mem, "local%u", mem->instance); - GEM_BUG_ON(!HAS_REGION(i915, BIT(id))); + GEM_BUG_ON(!HAS_REGION(i915, id)); GEM_BUG_ON(i915->mm.regions[id]); i915->mm.regions[id] = mem; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a72cecd2dbc74..1ddd9c8a48813 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -757,8 +757,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, */ #define HAS_64K_PAGES(i915) (INTEL_INFO(i915)->has_64k_pages) -#define HAS_REGION(i915, i) (INTEL_INFO(i915)->memory_regions & (i)) -#define HAS_LMEM(i915) HAS_REGION(i915, REGION_LMEM) +#define HAS_REGION(i915, id) (INTEL_INFO(i915)->memory_regions & BIT(id)) +#define HAS_LMEM(i915) HAS_REGION(i915, INTEL_REGION_LMEM_0) #define HAS_EXTRA_GT_LIST(i915) (INTEL_INFO(i915)->extra_gt_list) diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index 60a03340bbd43..3a64ba4fa299d 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -333,7 +333,7 @@ int intel_memory_regions_hw_probe(struct drm_i915_private *i915) struct intel_memory_region *mem = ERR_PTR(-ENODEV); u16 type, instance; - if (!HAS_REGION(i915, BIT(i))) + if (!HAS_REGION(i915, i)) continue; type = intel_region_map[i].class; From 3797783b1c966885e8bae4a7e6b99ca2f0fae4f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 2 May 2024 15:14:23 +0300 Subject: [PATCH 05/15] drm/i915: Remove counter productive REGION_* wrappers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This extra macro level between the region IDs and their bitmasks just makes it harder to see what is used where. Get rid of the wrappers. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240502121423.1002-3-ville.syrjala@linux.intel.com Reviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_pci.c | 6 +++--- drivers/gpu/drm/i915/intel_memory_region.h | 5 ----- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 2 +- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 8b4fdeabb12a5..37a1d530a59c4 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -81,7 +81,7 @@ .__runtime.page_sizes = I915_GTT_PAGE_SIZE_4K #define GEN_DEFAULT_REGIONS \ - .memory_regions = REGION_SMEM | REGION_STOLEN_SMEM + .memory_regions = BIT(INTEL_REGION_SMEM) | BIT(INTEL_REGION_STOLEN_SMEM) #define I830_FEATURES \ GEN(2), \ @@ -660,7 +660,7 @@ static const struct intel_device_info rkl_info = { }; #define DGFX_FEATURES \ - .memory_regions = REGION_SMEM | REGION_LMEM | REGION_STOLEN_LMEM, \ + .memory_regions = BIT(INTEL_REGION_SMEM) | BIT(INTEL_REGION_LMEM_0) | BIT(INTEL_REGION_STOLEN_LMEM), \ .has_llc = 0, \ .has_pxp = 0, \ .has_snoop = 1, \ @@ -835,7 +835,7 @@ static const struct intel_device_info mtl_info = { .has_snoop = 1, .max_pat_index = 4, .has_pxp = 1, - .memory_regions = REGION_SMEM | REGION_STOLEN_LMEM, + .memory_regions = BIT(INTEL_REGION_SMEM) | BIT(INTEL_REGION_STOLEN_LMEM), .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(CCS0), MTL_CACHELEVEL, }; diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 9ba36454e51b0..72496e401016b 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -38,11 +38,6 @@ enum intel_region_id { INTEL_REGION_UNKNOWN, /* Should be last */ }; -#define REGION_SMEM BIT(INTEL_REGION_SMEM) -#define REGION_LMEM BIT(INTEL_REGION_LMEM_0) -#define REGION_STOLEN_SMEM BIT(INTEL_REGION_STOLEN_SMEM) -#define REGION_STOLEN_LMEM BIT(INTEL_REGION_STOLEN_LMEM) - #define I915_ALLOC_CONTIGUOUS BIT(0) #define for_each_memory_region(mr, i915, id) \ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index af349fd9abc2a..0bd29846873b3 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -122,7 +122,7 @@ static const struct intel_device_info mock_info = { .__runtime.page_sizes = (I915_GTT_PAGE_SIZE_4K | I915_GTT_PAGE_SIZE_64K | I915_GTT_PAGE_SIZE_2M), - .memory_regions = REGION_SMEM, + .memory_regions = BIT(INTEL_REGION_SMEM), .platform_engine_mask = BIT(0), /* simply use legacy cache level for mock device */ From a3598d7d9ae9b4c9ded0f052d96ca5800758d949 Mon Sep 17 00:00:00 2001 From: Deming Wang Date: Mon, 13 May 2024 02:14:51 -0400 Subject: [PATCH 06/15] drm/i915/gem/i915_gem_ttm_move: Fix typo The mapings should be replaced by mappings. Reviewed-by: Rodrigo Vivi Signed-off-by: Deming Wang Link: https://patchwork.freedesktop.org/patch/msgid/20240513061451.1627-1-wangdeming@inspur.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 7078af2f8f795..03b00a03a634d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -155,7 +155,7 @@ void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj) * @bo: The ttm buffer object. * * This function prepares an object for move by removing all GPU bindings, - * removing all CPU mapings and finally releasing the pages sg-table. + * removing all CPU mappings and finally releasing the pages sg-table. * * Return: 0 if successful, negative error code on error. */ From fbad43eccae5cb14594195c20113369aabaa22b5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 23 Apr 2024 18:23:10 +0200 Subject: [PATCH 07/15] drm/i915/gt: Disarm breadcrumbs if engines are already idle The breadcrumbs use a GT wakeref for guarding the interrupt, but are disarmed during release of the engine wakeref. This leaves a hole where we may attach a breadcrumb just as the engine is parking (after it has parked its breadcrumbs), execute the irq worker with some signalers still attached, but never be woken again. That issue manifests itself in CI with IGT runner timeouts while tests are waiting indefinitely for release of all GT wakerefs. <6> [209.151778] i915: Running live_engine_pm_selftests/live_engine_busy_stats <7> [209.231628] i915 0000:00:02.0: [drm:intel_power_well_disable [i915]] disabling PW_5 <7> [209.231816] i915 0000:00:02.0: [drm:intel_power_well_disable [i915]] disabling PW_4 <7> [209.231944] i915 0000:00:02.0: [drm:intel_power_well_disable [i915]] disabling PW_3 <7> [209.232056] i915 0000:00:02.0: [drm:intel_power_well_disable [i915]] disabling PW_2 <7> [209.232166] i915 0000:00:02.0: [drm:intel_power_well_disable [i915]] disabling DC_off <7> [209.232270] i915 0000:00:02.0: [drm:skl_enable_dc6 [i915]] Enabling DC6 <7> [209.232368] i915 0000:00:02.0: [drm:gen9_set_dc_state.part.0 [i915]] Setting DC state from 00 to 02 <4> [299.356116] [IGT] Inactivity timeout exceeded. Killing the current test with SIGQUIT. ... <6> [299.356526] sysrq: Show State ... <6> [299.373964] task:i915_selftest state:D stack:11784 pid:5578 tgid:5578 ppid:873 flags:0x00004002 <6> [299.373967] Call Trace: <6> [299.373968] <6> [299.373970] __schedule+0x3bb/0xda0 <6> [299.373974] schedule+0x41/0x110 <6> [299.373976] intel_wakeref_wait_for_idle+0x82/0x100 [i915] <6> [299.374083] ? __pfx_var_wake_function+0x10/0x10 <6> [299.374087] live_engine_busy_stats+0x9b/0x500 [i915] <6> [299.374173] __i915_subtests+0xbe/0x240 [i915] <6> [299.374277] ? __pfx___intel_gt_live_setup+0x10/0x10 [i915] <6> [299.374369] ? __pfx___intel_gt_live_teardown+0x10/0x10 [i915] <6> [299.374456] intel_engine_live_selftests+0x1c/0x30 [i915] <6> [299.374547] __run_selftests+0xbb/0x190 [i915] <6> [299.374635] i915_live_selftests+0x4b/0x90 [i915] <6> [299.374717] i915_pci_probe+0x10d/0x210 [i915] At the end of the interrupt worker, if there are no more engines awake, disarm the breadcrumb and go to sleep. Fixes: 9d5612ca165a ("drm/i915/gt: Defer enabling the breadcrumb interrupt to after submission") Closes: https://gitlab.freedesktop.org/drm/intel/issues/10026 Signed-off-by: Chris Wilson Cc: Andrzej Hajda Cc: # v5.12+ Signed-off-by: Janusz Krzysztofik Acked-by: Nirmoy Das Reviewed-by: Andrzej Hajda Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240423165505.465734-2-janusz.krzysztofik@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index d650beb8ed22f..20b9b04ec1e0b 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -263,8 +263,13 @@ static void signal_irq_work(struct irq_work *work) i915_request_put(rq); } + /* Lazy irq enabling after HW submission */ if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers)) intel_breadcrumbs_arm_irq(b); + + /* And confirm that we still want irqs enabled before we yield */ + if (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) + intel_breadcrumbs_disarm_irq(b); } struct intel_breadcrumbs * @@ -315,13 +320,7 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b) return; /* Kick the work once more to drain the signalers, and disarm the irq */ - irq_work_sync(&b->irq_work); - while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) { - local_irq_disable(); - signal_irq_work(&b->irq_work); - local_irq_enable(); - cond_resched(); - } + irq_work_queue(&b->irq_work); } void intel_breadcrumbs_free(struct kref *kref) @@ -404,7 +403,7 @@ static void insert_breadcrumb(struct i915_request *rq) * the request as it may have completed and raised the interrupt as * we were attaching it into the lists. */ - if (!b->irq_armed || __i915_request_is_complete(rq)) + if (!READ_ONCE(b->irq_armed) || __i915_request_is_complete(rq)) irq_work_queue(&b->irq_work); } From e1eb97c211859ed4a86f63be24c6723316450bcf Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 14 May 2024 15:59:38 +0100 Subject: [PATCH 08/15] drm/i915: Shadow default engine context image in the context To enable adding override of the default engine context image let us start shadowing the per engine state in the context. Signed-off-by: Tvrtko Ursulin Cc: Lionel Landwerlin Cc: Carlos Santa Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Signed-off-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20240514145939.87427-1-tursulin@igalia.com --- drivers/gpu/drm/i915/gt/intel_context_types.h | 2 ++ drivers/gpu/drm/i915/gt/intel_lrc.c | 7 ++++--- drivers/gpu/drm/i915/gt/intel_ring_submission.c | 7 ++++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index ed95a7b57cbba..6ae8abfeccdb6 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -99,6 +99,8 @@ struct intel_context { struct i915_address_space *vm; struct i915_gem_context __rcu *gem_context; + struct file *default_state; + /* * @signal_lock protects the list of requests that need signaling, * @signals. While there are any requests that need signaling, diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index b387146ede986..d4ffb352403c1 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1017,9 +1017,8 @@ void lrc_init_state(struct intel_context *ce, set_redzone(state, engine); - if (engine->default_state) { - shmem_read(engine->default_state, 0, - state, engine->context_size); + if (ce->default_state) { + shmem_read(ce->default_state, 0, state, engine->context_size); __set_bit(CONTEXT_VALID_BIT, &ce->flags); inhibit = false; } @@ -1131,6 +1130,8 @@ int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine) GEM_BUG_ON(ce->state); + ce->default_state = engine->default_state; + vma = __lrc_alloc_state(ce, engine); if (IS_ERR(vma)) return PTR_ERR(vma); diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 92085ffd23de0..8625e88e785ff 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -474,8 +474,7 @@ static int ring_context_init_default_state(struct intel_context *ce, if (IS_ERR(vaddr)) return PTR_ERR(vaddr); - shmem_read(ce->engine->default_state, 0, - vaddr, ce->engine->context_size); + shmem_read(ce->default_state, 0, vaddr, ce->engine->context_size); i915_gem_object_flush_map(obj); __i915_gem_object_release_map(obj); @@ -491,7 +490,7 @@ static int ring_context_pre_pin(struct intel_context *ce, struct i915_address_space *vm; int err = 0; - if (ce->engine->default_state && + if (ce->default_state && !test_bit(CONTEXT_VALID_BIT, &ce->flags)) { err = ring_context_init_default_state(ce, ww); if (err) @@ -570,6 +569,8 @@ static int ring_context_alloc(struct intel_context *ce) { struct intel_engine_cs *engine = ce->engine; + ce->default_state = engine->default_state; + /* One ringbuffer to rule them all */ GEM_BUG_ON(!engine->legacy.ring); ce->ring = engine->legacy.ring; From 0f1bb41bf39695c84c83ce6f69e125b562d1d7ab Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 14 May 2024 15:59:39 +0100 Subject: [PATCH 09/15] drm/i915: Support replaying GPU hangs with captured context image When debugging GPU hangs Mesa developers are finding it useful to replay the captured error state against the simulator. But due various simulator limitations which prevent replicating all hangs, one step further is being able to replay against a real GPU. This is almost doable today with the missing part being able to upload the captured context image into the driver state prior to executing the uploaded hanging batch and all the buffers. To enable this last part we add a new context parameter called I915_CONTEXT_PARAM_CONTEXT_IMAGE. It follows the existing SSEU configuration pattern of being able to select which context to apply against, paired with the actual image and its size. Since this is adding a new concept of debug only uapi, we hide it behind a new kconfig option and also require activation with a module parameter. Together with a warning banner printed at driver load, all those combined should be sufficient to guard against inadvertently enabling the feature. In terms of implementation we allow the legacy context set param to be used since that removes the need to record the per context data in the proto context, while still allowing flexibility of specifying context images for any context. Mesa MR using the uapi can be seen at: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27594 v2: * Fix whitespace alignment as per checkpatch. * Added warning on userspace misuse. * Rebase for extracting ce->default_state shadowing. v3: * Rebase for I915_CONTEXT_PARAM_LOW_LATENCY. Signed-off-by: Tvrtko Ursulin Cc: Lionel Landwerlin Cc: Carlos Santa Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Tested-by: Carlos Santa Signed-off-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20240514145939.87427-2-tursulin@igalia.com --- drivers/gpu/drm/i915/Kconfig.debug | 17 +++ drivers/gpu/drm/i915/gem/i915_gem_context.c | 113 ++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_context.c | 2 + drivers/gpu/drm/i915/gt/intel_context.h | 22 ++++ drivers/gpu/drm/i915/gt/intel_context_types.h | 1 + drivers/gpu/drm/i915/gt/intel_lrc.c | 3 +- .../gpu/drm/i915/gt/intel_ring_submission.c | 3 +- drivers/gpu/drm/i915/i915_params.c | 5 + drivers/gpu/drm/i915/i915_params.h | 3 +- include/uapi/drm/i915_drm.h | 27 +++++ 10 files changed, 193 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index d8397065c3f0b..1852e08049424 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -16,6 +16,23 @@ config DRM_I915_WERROR If in doubt, say "N". +config DRM_I915_REPLAY_GPU_HANGS_API + bool "Enable GPU hang replay userspace API" + depends on DRM_I915 + depends on EXPERT + default n + help + Choose this option if you want to enable special and unstable + userspace API used for replaying GPU hangs on a running system. + + This API is intended to be used by userspace graphics stack developers + and provides no stability guarantees. + + The API needs to be activated at boot time using the + enable_debug_only_api module parameter. + + If in doubt, say "N". + config DRM_I915_DEBUG bool "Enable additional driver debugging" depends on DRM_I915 diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 81f65cab13308..c0543c35cd6ac 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -78,6 +78,7 @@ #include "gt/intel_engine_user.h" #include "gt/intel_gpu_commands.h" #include "gt/intel_ring.h" +#include "gt/shmem_utils.h" #include "pxp/intel_pxp.h" @@ -957,6 +958,7 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, case I915_CONTEXT_PARAM_NO_ZEROMAP: case I915_CONTEXT_PARAM_BAN_PERIOD: case I915_CONTEXT_PARAM_RINGSIZE: + case I915_CONTEXT_PARAM_CONTEXT_IMAGE: default: ret = -EINVAL; break; @@ -2104,6 +2106,95 @@ static int get_protected(struct i915_gem_context *ctx, return 0; } +static int set_context_image(struct i915_gem_context *ctx, + struct drm_i915_gem_context_param *args) +{ + struct i915_gem_context_param_context_image user; + struct intel_context *ce; + struct file *shmem_state; + unsigned long lookup; + void *state; + int ret = 0; + + if (!IS_ENABLED(CONFIG_DRM_I915_REPLAY_GPU_HANGS_API)) + return -EINVAL; + + if (!ctx->i915->params.enable_debug_only_api) + return -EINVAL; + + if (args->size < sizeof(user)) + return -EINVAL; + + if (copy_from_user(&user, u64_to_user_ptr(args->value), sizeof(user))) + return -EFAULT; + + if (user.mbz) + return -EINVAL; + + if (user.flags & ~(I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX)) + return -EINVAL; + + lookup = 0; + if (user.flags & I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX) + lookup |= LOOKUP_USER_INDEX; + + ce = lookup_user_engine(ctx, lookup, &user.engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + if (user.size < ce->engine->context_size) { + ret = -EINVAL; + goto out_ce; + } + + if (drm_WARN_ON_ONCE(&ctx->i915->drm, + test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) { + /* + * This is racy but for a debug only API, if userspace is keen + * to create and configure contexts, while simultaneously using + * them from a second thread, let them suffer by potentially not + * executing with the context image they just raced to apply. + */ + ret = -EBUSY; + goto out_ce; + } + + state = kmalloc(ce->engine->context_size, GFP_KERNEL); + if (!state) { + ret = -ENOMEM; + goto out_ce; + } + + if (copy_from_user(state, u64_to_user_ptr(user.image), + ce->engine->context_size)) { + ret = -EFAULT; + goto out_state; + } + + shmem_state = shmem_create_from_data(ce->engine->name, + state, ce->engine->context_size); + if (IS_ERR(shmem_state)) { + ret = PTR_ERR(shmem_state); + goto out_state; + } + + if (intel_context_set_own_state(ce)) { + ret = -EBUSY; + fput(shmem_state); + goto out_state; + } + + ce->default_state = shmem_state; + + args->size = sizeof(user); + +out_state: + kfree(state); +out_ce: + intel_context_put(ce); + return ret; +} + static int ctx_setparam(struct drm_i915_file_private *fpriv, struct i915_gem_context *ctx, struct drm_i915_gem_context_param *args) @@ -2156,6 +2247,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, ret = set_persistence(ctx, args); break; + case I915_CONTEXT_PARAM_CONTEXT_IMAGE: + ret = set_context_image(ctx, args); + break; + case I915_CONTEXT_PARAM_PROTECTED_CONTENT: case I915_CONTEXT_PARAM_NO_ZEROMAP: case I915_CONTEXT_PARAM_BAN_PERIOD: @@ -2500,6 +2595,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, case I915_CONTEXT_PARAM_BAN_PERIOD: case I915_CONTEXT_PARAM_ENGINES: case I915_CONTEXT_PARAM_RINGSIZE: + case I915_CONTEXT_PARAM_CONTEXT_IMAGE: default: ret = -EINVAL; break; @@ -2612,5 +2708,22 @@ int __init i915_gem_context_module_init(void) if (!slab_luts) return -ENOMEM; + if (IS_ENABLED(CONFIG_DRM_I915_REPLAY_GPU_HANGS_API)) { + pr_notice("**************************************************************\n"); + pr_notice("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_notice("** **\n"); + if (i915_modparams.enable_debug_only_api) + pr_notice("** i915.enable_debug_only_api is intended to be set **\n"); + else + pr_notice("** CONFIG_DRM_I915_REPLAY_GPU_HANGS_API builds are intended **\n"); + pr_notice("** for specific userspace graphics stack developers only! **\n"); + pr_notice("** **\n"); + pr_notice("** If you are seeing this message please report this to the **\n"); + pr_notice("** provider of your kernel build. **\n"); + pr_notice("** **\n"); + pr_notice("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_notice("**************************************************************\n"); + } + return 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index a2f1245741bbf..b1b8695ba7c97 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -27,6 +27,8 @@ static void rcu_context_free(struct rcu_head *rcu) struct intel_context *ce = container_of(rcu, typeof(*ce), rcu); trace_intel_context_free(ce); + if (intel_context_has_own_state(ce)) + fput(ce->default_state); kmem_cache_free(slab_ce, ce); } diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 25564c01507e4..9f523999acd1b 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -375,6 +375,28 @@ intel_context_clear_nopreempt(struct intel_context *ce) clear_bit(CONTEXT_NOPREEMPT, &ce->flags); } +#if IS_ENABLED(CONFIG_DRM_I915_REPLAY_GPU_HANGS_API) +static inline bool intel_context_has_own_state(const struct intel_context *ce) +{ + return test_bit(CONTEXT_OWN_STATE, &ce->flags); +} + +static inline bool intel_context_set_own_state(struct intel_context *ce) +{ + return test_and_set_bit(CONTEXT_OWN_STATE, &ce->flags); +} +#else +static inline bool intel_context_has_own_state(const struct intel_context *ce) +{ + return false; +} + +static inline bool intel_context_set_own_state(struct intel_context *ce) +{ + return true; +} +#endif + u64 intel_context_get_total_runtime_ns(struct intel_context *ce); u64 intel_context_get_avg_runtime_ns(struct intel_context *ce); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 6ae8abfeccdb6..98c7f6052069c 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -133,6 +133,7 @@ struct intel_context { #define CONTEXT_IS_PARKING 12 #define CONTEXT_EXITING 13 #define CONTEXT_LOW_LATENCY 14 +#define CONTEXT_OWN_STATE 15 struct { u64 timeout_us; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index d4ffb352403c1..7bd5d2c290564 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1130,7 +1130,8 @@ int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine) GEM_BUG_ON(ce->state); - ce->default_state = engine->default_state; + if (!intel_context_has_own_state(ce)) + ce->default_state = engine->default_state; vma = __lrc_alloc_state(ce, engine); if (IS_ERR(vma)) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 8625e88e785ff..72277bc8322e8 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -569,7 +569,8 @@ static int ring_context_alloc(struct intel_context *ce) { struct intel_engine_cs *engine = ce->engine; - ce->default_state = engine->default_state; + if (!intel_context_has_own_state(ce)) + ce->default_state = engine->default_state; /* One ringbuffer to rule them all */ GEM_BUG_ON(!engine->legacy.ring); diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 8c00169e3ab7b..316e55f3e87bd 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -131,6 +131,11 @@ i915_param_named_unsafe(lmem_size, uint, 0400, i915_param_named_unsafe(lmem_bar_size, uint, 0400, "Set the lmem bar size(in MiB)."); +#if IS_ENABLED(CONFIG_DRM_I915_REPLAY_GPU_HANGS_API) +i915_param_named(enable_debug_only_api, bool, 0400, + "Enable support for unstable debug only userspace API. (default:false)"); +#endif + static void _param_print_bool(struct drm_printer *p, const char *name, bool val) { diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 2eb3f2115ff28..0fbcb5b6d7bfb 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -63,7 +63,8 @@ struct drm_printer; /* leave bools at the end to not create holes */ \ param(bool, enable_hangcheck, true, 0600) \ param(bool, error_capture, true, IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) ? 0600 : 0) \ - param(bool, enable_gvt, false, IS_ENABLED(CONFIG_DRM_I915_GVT) ? 0400 : 0) + param(bool, enable_gvt, false, IS_ENABLED(CONFIG_DRM_I915_GVT) ? 0400 : 0) \ + param(bool, enable_debug_only_api, false, IS_ENABLED(CONFIG_DRM_I915_REPLAY_GPU_HANGS_API) ? 0400 : 0) #define MEMBER(T, member, ...) T member; struct i915_params { diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index d4d86e566e077..535cb68fdb5c4 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2163,6 +2163,15 @@ struct drm_i915_gem_context_param { * supports this per context flag. */ #define I915_CONTEXT_PARAM_LOW_LATENCY 0xe + +/* + * I915_CONTEXT_PARAM_CONTEXT_IMAGE: + * + * Allows userspace to provide own context images. + * + * Note that this is a debug API not available on production kernel builds. + */ +#define I915_CONTEXT_PARAM_CONTEXT_IMAGE 0xf /* Must be kept compact -- no holes and well documented */ /** @value: Context parameter value to be set or queried */ @@ -2564,6 +2573,24 @@ struct i915_context_param_engines { struct i915_engine_class_instance engines[N__]; \ } __attribute__((packed)) name__ +struct i915_gem_context_param_context_image { + /** @engine: Engine class & instance to be configured. */ + struct i915_engine_class_instance engine; + + /** @flags: One of the supported flags or zero. */ + __u32 flags; +#define I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX (1u << 0) + + /** @size: Size of the image blob pointed to by @image. */ + __u32 size; + + /** @mbz: Must be zero. */ + __u32 mbz; + + /** @image: Userspace memory containing the context image. */ + __u64 image; +} __attribute__((packed)); + /** * struct drm_i915_gem_context_create_ext_setparam - Context parameter * to set or query during context creation. From 364e039827ef628c650c21c1afe1c54d9c3296d9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 30 Apr 2024 09:48:09 -0700 Subject: [PATCH 10/15] drm/i915/guc: avoid FIELD_PREP warning With gcc-7 and earlier, there are lots of warnings like In file included from :0:0: In function '__guc_context_policy_add_priority.isra.66', inlined from '__guc_context_set_prio.isra.67' at drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:3292:3, inlined from 'guc_context_set_prio' at drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:3320:2: include/linux/compiler_types.h:399:38: error: call to '__compiletime_assert_631' declared with attribute error: FIELD_PREP: mask is not constant _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^ ... drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:2422:3: note: in expansion of macro 'FIELD_PREP' FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ ^~~~~~~~~~ Make sure that GUC_KLV_0_KEY is an unsigned value to avoid the warning. Fixes: 77b6f79df66e ("drm/i915/guc: Update to GuC version 69.0.3") Signed-off-by: Arnd Bergmann Reviewed-by: Michal Wajdeczko Signed-off-by: Julia Filipchuk Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20240430164809.482131-1-julia.filipchuk@intel.com --- drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h index bebf28e3c4794..525587cfe1af9 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h @@ -29,9 +29,9 @@ */ #define GUC_KLV_LEN_MIN 1u -#define GUC_KLV_0_KEY (0xffff << 16) -#define GUC_KLV_0_LEN (0xffff << 0) -#define GUC_KLV_n_VALUE (0xffffffff << 0) +#define GUC_KLV_0_KEY (0xffffu << 16) +#define GUC_KLV_0_LEN (0xffffu << 0) +#define GUC_KLV_n_VALUE (0xffffffffu << 0) /** * DOC: GuC Self Config KLVs From a09d2327a9ba8e3f5be238bc1b7ca2809255b464 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Fri, 17 May 2024 11:06:16 +0200 Subject: [PATCH 11/15] drm/i915/gt: Fix CCS id's calculation for CCS mode setting The whole point of the previous fixes has been to change the CCS hardware configuration to generate only one stream available to the compute users. We did this by changing the info.engine_mask that is set during device probe, reset during the detection of the fused engines, and finally reset again when choosing the CCS mode. We can't use the engine_mask variable anymore, as with the current configuration, it imposes only one CCS no matter what the hardware configuration is. Before changing the engine_mask for the third time, save it and use it for calculating the CCS mode. After the previous changes, the user reported a performance drop to around 1/4. We have tested that the compute operations, with the current patch, have improved by the same factor. Fixes: 6db31251bb26 ("drm/i915/gt: Enable only one CCS for compute workload") Signed-off-by: Andi Shyti Cc: Chris Wilson Cc: Gnattu OC Cc: Joonas Lahtinen Cc: Matt Roper Tested-by: Jian Ye Reviewed-by: Umesh Nerlige Ramappa Tested-by: Gnattu OC Link: https://patchwork.freedesktop.org/patch/msgid/20240517090616.242529-1-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 6 ++++++ drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c | 2 +- drivers/gpu/drm/i915/gt/intel_gt_types.h | 8 ++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 5c8e9ee3b0083..3b740ca250009 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -885,6 +885,12 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) if (IS_DG2(gt->i915)) { u8 first_ccs = __ffs(CCS_MASK(gt)); + /* + * Store the number of active cslices before + * changing the CCS engine configuration + */ + gt->ccs.cslices = CCS_MASK(gt); + /* Mask off all the CCS engine */ info->engine_mask &= ~GENMASK(CCS3, CCS0); /* Put back in the first CCS engine */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c index 99b71bb7da0a6..3c62a44e9106c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c @@ -19,7 +19,7 @@ unsigned int intel_gt_apply_ccs_mode(struct intel_gt *gt) /* Build the value for the fixed CCS load balancing */ for (cslice = 0; cslice < I915_MAX_CCS; cslice++) { - if (CCS_MASK(gt) & BIT(cslice)) + if (gt->ccs.cslices & BIT(cslice)) /* * If available, assign the cslice * to the first available engine... diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index def7dd0eb6f19..cfdd2ad5e9549 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -207,6 +207,14 @@ struct intel_gt { [MAX_ENGINE_INSTANCE + 1]; enum intel_submission_method submission_method; + struct { + /* + * Mask of the non fused CCS slices + * to be used for the load balancing + */ + intel_engine_mask_t cslices; + } ccs; + /* * Default address space (either GGTT or ppGTT depending on arch). * From c5d86c19086fa752f0ef7ff8bca5df2dfc2fb00f Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Fri, 24 May 2024 01:58:53 +0200 Subject: [PATCH 12/15] drm/i915: Increase FLR timeout from 3s to 9s Following the guidelines it takes 3 seconds to perform an FLR reset. Let's give it a bit more slack because this time can change depending on the platform and on the firmware Signed-off-by: Andi Shyti Reviewed-by: Rodrigo Vivi Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240523235853.171796-1-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 729409a4bada8..2eba289d88ad1 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -2614,11 +2614,18 @@ void intel_uncore_prune_engine_fw_domains(struct intel_uncore *uncore, static void driver_initiated_flr(struct intel_uncore *uncore) { struct drm_i915_private *i915 = uncore->i915; - const unsigned int flr_timeout_ms = 3000; /* specs recommend a 3s wait */ + unsigned int flr_timeout_ms; int ret; drm_dbg(&i915->drm, "Triggering Driver-FLR\n"); + /* + * The specification recommends a 3 seconds FLR reset timeout. To be + * cautious, we will extend this to 9 seconds, three times the specified + * timeout. + */ + flr_timeout_ms = 9000; + /* * Make sure any pending FLR requests have cleared by waiting for the * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS From ca1a453361cd1cc73752998d1acd8616582c2a64 Mon Sep 17 00:00:00 2001 From: "Niemiec, Krzysztof" Date: Mon, 3 Jun 2024 18:20:22 +0200 Subject: [PATCH 13/15] drm/i915/gt: Delete the live_hearbeat_fast selftest The test is trying to push the heartbeat frequency to the limit, which might sometimes fail. Such a failure does not provide valuable information, because it does not indicate that there is something necessarily wrong with either the driver or the hardware. Remove the test to prevent random, unnecessary failures from appearing in CI. Suggested-by: Chris Wilson Signed-off-by: Niemiec, Krzysztof Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/fe2vu5h7v7ooxbhwpbfsypxg5mjrnt56gc3cgrqpnhgrgce334@qfrv2skxrp47 --- .../drm/i915/gt/selftest_engine_heartbeat.c | 110 ------------------ 1 file changed, 110 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index ef014df4c4fcc..9e4f0e417b3be 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -193,115 +193,6 @@ static int live_idle_pulse(void *arg) return err; } -static int cmp_u32(const void *_a, const void *_b) -{ - const u32 *a = _a, *b = _b; - - return *a - *b; -} - -static int __live_heartbeat_fast(struct intel_engine_cs *engine) -{ - const unsigned int error_threshold = max(20000u, jiffies_to_usecs(6)); - struct intel_context *ce; - struct i915_request *rq; - ktime_t t0, t1; - u32 times[5]; - int err; - int i; - - ce = intel_context_create(engine); - if (IS_ERR(ce)) - return PTR_ERR(ce); - - intel_engine_pm_get(engine); - - err = intel_engine_set_heartbeat(engine, 1); - if (err) - goto err_pm; - - for (i = 0; i < ARRAY_SIZE(times); i++) { - do { - /* Manufacture a tick */ - intel_engine_park_heartbeat(engine); - GEM_BUG_ON(engine->heartbeat.systole); - engine->serial++; /* pretend we are not idle! */ - intel_engine_unpark_heartbeat(engine); - - flush_delayed_work(&engine->heartbeat.work); - if (!delayed_work_pending(&engine->heartbeat.work)) { - pr_err("%s: heartbeat %d did not start\n", - engine->name, i); - err = -EINVAL; - goto err_pm; - } - - rcu_read_lock(); - rq = READ_ONCE(engine->heartbeat.systole); - if (rq) - rq = i915_request_get_rcu(rq); - rcu_read_unlock(); - } while (!rq); - - t0 = ktime_get(); - while (rq == READ_ONCE(engine->heartbeat.systole)) - yield(); /* work is on the local cpu! */ - t1 = ktime_get(); - - i915_request_put(rq); - times[i] = ktime_us_delta(t1, t0); - } - - sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL); - - pr_info("%s: Heartbeat delay: %uus [%u, %u]\n", - engine->name, - times[ARRAY_SIZE(times) / 2], - times[0], - times[ARRAY_SIZE(times) - 1]); - - /* - * Ideally, the upper bound on min work delay would be something like - * 2 * 2 (worst), +1 for scheduling, +1 for slack. In practice, we - * are, even with system_wq_highpri, at the mercy of the CPU scheduler - * and may be stuck behind some slow work for many millisecond. Such - * as our very own display workers. - */ - if (times[ARRAY_SIZE(times) / 2] > error_threshold) { - pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n", - engine->name, - times[ARRAY_SIZE(times) / 2], - error_threshold); - err = -EINVAL; - } - - reset_heartbeat(engine); -err_pm: - intel_engine_pm_put(engine); - intel_context_put(ce); - return err; -} - -static int live_heartbeat_fast(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err = 0; - - /* Check that the heartbeat ticks at the desired rate. */ - if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) - return 0; - - for_each_engine(engine, gt, id) { - err = __live_heartbeat_fast(engine); - if (err) - break; - } - - return err; -} - static int __live_heartbeat_off(struct intel_engine_cs *engine) { int err; @@ -372,7 +263,6 @@ int intel_heartbeat_live_selftests(struct drm_i915_private *i915) static const struct i915_subtest tests[] = { SUBTEST(live_idle_flush), SUBTEST(live_idle_pulse), - SUBTEST(live_heartbeat_fast), SUBTEST(live_heartbeat_off), }; int saved_hangcheck; From 6ef078383a50dded4bb9e71250ea6f7b533a6109 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Tue, 28 May 2024 16:05:15 -0700 Subject: [PATCH 14/15] drm/i915/guc: Enable w/a 16021333562 for DG2, MTL and ARL Enable another workaround that is implemented inside the GuC. v2: Use the correct Gen12 w/a id rather than the Xe version (review feedback from Matthew R) also extend to include ARL. Signed-off-by: John Harrison Reviewed-by: Julia Filipchuk Link: https://patchwork.freedesktop.org/patch/msgid/20240528230515.479395-1-John.C.Harrison@Intel.com --- drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 32 ++++++++++++------- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h index 525587cfe1af9..37ff539a6963d 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h @@ -106,6 +106,7 @@ enum { */ enum { GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE = 0x9001, + GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED = 0x9002, }; #endif /* _ABI_GUC_KLVS_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index c606bb5e3b7b0..7995f059f30df 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -815,23 +815,23 @@ guc_capture_prep_lists(struct intel_guc *guc) return PAGE_ALIGN(total_size); } -/* Wa_14019159160 */ -static u32 guc_waklv_ra_mode(struct intel_guc *guc, u32 offset, u32 remain) +static void guc_waklv_enable_simple(struct intel_guc *guc, + u32 klv_id, u32 *offset, u32 *remain) { u32 size; u32 klv_entry[] = { /* 16:16 key/length */ - FIELD_PREP(GUC_KLV_0_KEY, GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE) | + FIELD_PREP(GUC_KLV_0_KEY, klv_id) | FIELD_PREP(GUC_KLV_0_LEN, 0), /* 0 dwords data */ }; size = sizeof(klv_entry); - GEM_BUG_ON(remain < size); + GEM_BUG_ON(*remain < size); - iosys_map_memcpy_to(&guc->ads_map, offset, klv_entry, size); - - return size; + iosys_map_memcpy_to(&guc->ads_map, *offset, klv_entry, size); + *offset += size; + *remain -= size; } static void guc_waklv_init(struct intel_guc *guc) @@ -850,11 +850,19 @@ static void guc_waklv_init(struct intel_guc *guc) remain = guc_ads_waklv_size(guc); /* Wa_14019159160 */ - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) { - size = guc_waklv_ra_mode(guc, offset, remain); - offset += size; - remain -= size; - } + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) + guc_waklv_enable_simple(guc, + GUC_WORKAROUND_KLV_SERIALIZED_RA_MODE, + &offset, &remain); + + /* Wa_16021333562 */ + if ((GUC_FIRMWARE_VER(guc) >= MAKE_GUC_VER(70, 21, 1)) && + (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || + IS_MEDIA_GT_IP_RANGE(gt, IP_VER(13, 0), IP_VER(13, 0)) || + IS_DG2(gt->i915))) + guc_waklv_enable_simple(guc, + GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED, + &offset, &remain); size = guc_ads_waklv_size(guc) - remain; if (!size) From 79655e867ad6dfde2734c67c7704c0dd5bf1e777 Mon Sep 17 00:00:00 2001 From: Angus Chen Date: Fri, 24 May 2024 17:33:49 +0000 Subject: [PATCH 15/15] drm/i915/mtl: Update workaround 14018575942 The WA should be extended to cover VDBOX engine. We found that 28-channels 1080p VP9 encoding may hit this issue. v3: update the WA number and explain the reason why this workaround is needed v2: add WA number v1: initial version Signed-off-by: Angus Chen Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20240426073638.45775-1-angus.chen@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 5a0f1b279a80d..09a287c1aedd6 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1590,6 +1590,14 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) */ wa_write_or(wal, XELPMP_GSC_MOD_CTRL, FORCE_MISS_FTLB); + /* + * Wa_14018575942 + * + * Issue is seen on media KPI test running on VDBOX engine + * especially VP9 encoding WLs + */ + wa_write_or(wal, XELPMP_VDBX_MOD_CTRL, FORCE_MISS_FTLB); + /* Wa_22016670082 */ wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE);