From 30523408c0232d4f33ca0719004e5bffaf04220c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 28 Apr 2020 10:02:55 +0100 Subject: [PATCH 01/10] drm/i915: Avoid dereferencing a dead context Once the intel_context is closed, the GEM context may be freed and so the link from intel_context.gem_context is invalid. <3>[ 219.782944] BUG: KASAN: use-after-free in intel_engine_coredump_alloc+0x1bc3/0x2250 [i915] <3>[ 219.782996] Read of size 8 at addr ffff8881d7dff0b8 by task kworker/0:1/12 <4>[ 219.783052] CPU: 0 PID: 12 Comm: kworker/0:1 Tainted: G U 5.7.0-rc2-g1f3ffd7683d54-kasan_118+ #1 <4>[ 219.783055] Hardware name: System manufacturer System Product Name/Z170 PRO GAMING, BIOS 3402 04/26/2017 <4>[ 219.783105] Workqueue: events heartbeat [i915] <4>[ 219.783109] Call Trace: <4>[ 219.783113] <4>[ 219.783119] dump_stack+0x96/0xdb <4>[ 219.783177] ? intel_engine_coredump_alloc+0x1bc3/0x2250 [i915] <4>[ 219.783182] print_address_description.constprop.6+0x16/0x310 <4>[ 219.783239] ? intel_engine_coredump_alloc+0x1bc3/0x2250 [i915] <4>[ 219.783295] ? intel_engine_coredump_alloc+0x1bc3/0x2250 [i915] <4>[ 219.783300] __kasan_report+0x137/0x190 <4>[ 219.783359] ? intel_engine_coredump_alloc+0x1bc3/0x2250 [i915] <4>[ 219.783366] kasan_report+0x32/0x50 <4>[ 219.783426] intel_engine_coredump_alloc+0x1bc3/0x2250 [i915] <4>[ 219.783481] execlists_reset+0x39c/0x13d0 [i915] <4>[ 219.783494] ? mark_held_locks+0x9e/0xe0 <4>[ 219.783546] ? execlists_hold+0xfc0/0xfc0 [i915] <4>[ 219.783551] ? lockdep_hardirqs_on+0x348/0x5f0 <4>[ 219.783557] ? _raw_spin_unlock_irqrestore+0x34/0x60 <4>[ 219.783606] ? execlists_submission_tasklet+0x118/0x3a0 [i915] <4>[ 219.783615] tasklet_action_common.isra.14+0x13b/0x410 <4>[ 219.783623] ? __do_softirq+0x1e4/0x9a7 <4>[ 219.783630] __do_softirq+0x226/0x9a7 <4>[ 219.783643] do_softirq_own_stack+0x2a/0x40 <4>[ 219.783647] <4>[ 219.783692] ? heartbeat+0x3e2/0x10f0 [i915] <4>[ 219.783696] do_softirq.part.13+0x49/0x50 <4>[ 219.783700] __local_bh_enable_ip+0x1a2/0x1e0 <4>[ 219.783748] heartbeat+0x409/0x10f0 [i915] <4>[ 219.783801] ? __live_idle_pulse+0x9f0/0x9f0 [i915] <4>[ 219.783806] ? lock_acquire+0x1ac/0x8a0 <4>[ 219.783811] ? process_one_work+0x811/0x1870 <4>[ 219.783827] ? rcu_read_lock_sched_held+0x9c/0xd0 <4>[ 219.783832] ? rcu_read_lock_bh_held+0xb0/0xb0 <4>[ 219.783836] ? _raw_spin_unlock_irq+0x1f/0x40 <4>[ 219.783845] process_one_work+0x8ca/0x1870 <4>[ 219.783848] ? lock_acquire+0x1ac/0x8a0 <4>[ 219.783852] ? worker_thread+0x1d0/0xb80 <4>[ 219.783864] ? pwq_dec_nr_in_flight+0x2c0/0x2c0 <4>[ 219.783870] ? do_raw_spin_lock+0x129/0x290 <4>[ 219.783886] worker_thread+0x82/0xb80 <4>[ 219.783895] ? __kthread_parkme+0xaf/0x1b0 <4>[ 219.783902] ? process_one_work+0x1870/0x1870 <4>[ 219.783906] kthread+0x34e/0x420 <4>[ 219.783911] ? kthread_create_on_node+0xc0/0xc0 <4>[ 219.783918] ret_from_fork+0x3a/0x50 <3>[ 219.783950] Allocated by task 1264: <4>[ 219.783975] save_stack+0x19/0x40 <4>[ 219.783978] __kasan_kmalloc.constprop.3+0xa0/0xd0 <4>[ 219.784029] i915_gem_create_context+0xa2/0xab8 [i915] <4>[ 219.784081] i915_gem_context_create_ioctl+0x1fa/0x450 [i915] <4>[ 219.784085] drm_ioctl_kernel+0x1d8/0x270 <4>[ 219.784088] drm_ioctl+0x676/0x930 <4>[ 219.784092] ksys_ioctl+0xb7/0xe0 <4>[ 219.784096] __x64_sys_ioctl+0x6a/0xb0 <4>[ 219.784100] do_syscall_64+0x94/0x530 <4>[ 219.784103] entry_SYSCALL_64_after_hwframe+0x49/0xb3 <3>[ 219.784120] Freed by task 12: <4>[ 219.784141] save_stack+0x19/0x40 <4>[ 219.784145] __kasan_slab_free+0x130/0x180 <4>[ 219.784148] kmem_cache_free_bulk+0x1bd/0x500 <4>[ 219.784152] kfree_rcu_work+0x1d8/0x890 <4>[ 219.784155] process_one_work+0x8ca/0x1870 <4>[ 219.784158] worker_thread+0x82/0xb80 <4>[ 219.784162] kthread+0x34e/0x420 <4>[ 219.784165] ret_from_fork+0x3a/0x50 Fixes: 2e46a2a0b014 ("drm/i915: Use explicit flag to mark unreachable intel_context") Signed-off-by: Chris Wilson Acked-by: Akeem G Abodunrin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200428090255.10035-1-chris@chris-wilson.co.uk (cherry picked from commit 24aac336ff78eb55aedda043ed982c61dc3ac49a) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_gpu_error.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 2a4cd0ba54645..5c8e51d2ba5b3 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1207,8 +1207,6 @@ static void engine_record_registers(struct intel_engine_coredump *ee) static void record_request(const struct i915_request *request, struct i915_request_coredump *erq) { - const struct i915_gem_context *ctx; - erq->flags = request->fence.flags; erq->context = request->fence.context; erq->seqno = request->fence.seqno; @@ -1219,9 +1217,13 @@ static void record_request(const struct i915_request *request, erq->pid = 0; rcu_read_lock(); - ctx = rcu_dereference(request->context->gem_context); - if (ctx) - erq->pid = pid_nr(ctx->pid); + if (!intel_context_is_closed(request->context)) { + const struct i915_gem_context *ctx; + + ctx = rcu_dereference(request->context->gem_context); + if (ctx) + erq->pid = pid_nr(ctx->pid); + } rcu_read_unlock(); } From fe5a708267911d55cce42910d93e303924b088fd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 1 May 2020 13:22:49 +0100 Subject: [PATCH 02/10] drm/i915/gt: Make timeslicing an explicit engine property In order to allow userspace to rely on timeslicing to reorder their batches, we must support preemption of those user batches. Declare timeslicing as an explicit property that is a combination of having the kernel support and HW support. Suggested-by: Tvrtko Ursulin Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200501122249.12417-1-chris@chris-wilson.co.uk (cherry picked from commit a211da9c771bf97395a3ced83a3aa383372b13a7) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_engine.h | 9 --------- drivers/gpu/drm/i915/gt/intel_engine_types.h | 18 ++++++++++++++---- drivers/gpu/drm/i915/gt/intel_lrc.c | 5 ++++- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index b469de0dd9b65..a1aa0d3e8be1d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -333,13 +333,4 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine) return intel_engine_has_preemption(engine); } -static inline bool -intel_engine_has_timeslices(const struct intel_engine_cs *engine) -{ - if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) - return false; - - return intel_engine_has_semaphores(engine); -} - #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 80cdde7128420..bf4d13e088876 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -483,10 +483,11 @@ struct intel_engine_cs { #define I915_ENGINE_SUPPORTS_STATS BIT(1) #define I915_ENGINE_HAS_PREEMPTION BIT(2) #define I915_ENGINE_HAS_SEMAPHORES BIT(3) -#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) -#define I915_ENGINE_IS_VIRTUAL BIT(5) -#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6) -#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) +#define I915_ENGINE_HAS_TIMESLICES BIT(4) +#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5) +#define I915_ENGINE_IS_VIRTUAL BIT(6) +#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7) +#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8) unsigned int flags; /* @@ -584,6 +585,15 @@ intel_engine_has_semaphores(const struct intel_engine_cs *engine) return engine->flags & I915_ENGINE_HAS_SEMAPHORES; } +static inline bool +intel_engine_has_timeslices(const struct intel_engine_cs *engine) +{ + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + return false; + + return engine->flags & I915_ENGINE_HAS_TIMESLICES; +} + static inline bool intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine) { diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 683014e7bc51d..017fd5bf9c693 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -4369,8 +4369,11 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) engine->flags |= I915_ENGINE_SUPPORTS_STATS; if (!intel_vgpu_active(engine->i915)) { engine->flags |= I915_ENGINE_HAS_SEMAPHORES; - if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) { engine->flags |= I915_ENGINE_HAS_PREEMPTION; + if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + engine->flags |= I915_ENGINE_HAS_TIMESLICES; + } } if (INTEL_GEN(engine->i915) >= 12) From 421abe200321a2c907ede1a6208c558284ba0b75 Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Thu, 30 Apr 2020 14:46:54 -0700 Subject: [PATCH 03/10] drm/i915: Don't enable WaIncreaseLatencyIPCEnabled when IPC is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 5a7d202b1574, a logical AND was erroneously changed to an OR, causing WaIncreaseLatencyIPCEnabled to be enabled unconditionally for kabylake and coffeelake, even when IPC is disabled. Fix the logic so that WaIncreaseLatencyIPCEnabled is only used when IPC is enabled. Fixes: 5a7d202b1574 ("drm/i915: Drop WaIncreaseLatencyIPCEnabled/1140 for cnl") Cc: stable@vger.kernel.org # 5.3.x+ Signed-off-by: Sultan Alsawaf Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200430214654.51314-1-sultan@kerneltoast.com (cherry picked from commit 690d22dafa88b82453516387b475664047a6bd14) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 8375054ba27d9..a52986a9e7a68 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4992,7 +4992,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, * WaIncreaseLatencyIPCEnabled: kbl,cfl * Display WA #1141: kbl,cfl */ - if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) || + if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) && dev_priv->ipc_enabled) latency += 4; From 82152d424b6cb6fc1ede7d03d69c04e786688740 Mon Sep 17 00:00:00 2001 From: Peter Jones Date: Fri, 6 Jul 2018 15:04:24 -0400 Subject: [PATCH 04/10] Make the "Reducing compressed framebufer size" message be DRM_INFO_ONCE() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was sort of annoying me: random:~$ dmesg | tail -1 [523884.039227] [drm] Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS. random:~$ dmesg | grep -c "Reducing the compressed" 47 This patch makes it DRM_INFO_ONCE() just like the similar message farther down in that function is pr_info_once(). Cc: stable@vger.kernel.org Signed-off-by: Peter Jones Acked-by: Rodrigo Vivi Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1745 Link: https://patchwork.freedesktop.org/patch/msgid/20180706190424.29194-1-pjones@redhat.com [vsyrjala: Rebase due to per-device logging] Signed-off-by: Ville Syrjälä (cherry picked from commit 6b7fc6a3e6af4ff5773949d0fed70d8e7f68d5ce) [Rodrigo: port back to DRM_INFO_ONCE] Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_fbc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 2e5d835a9eaa8..c125ca9ab9b3a 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -485,8 +485,7 @@ static int intel_fbc_alloc_cfb(struct drm_i915_private *dev_priv, if (!ret) goto err_llb; else if (ret > 1) { - DRM_INFO("Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS.\n"); - + DRM_INFO_ONCE("Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS.\n"); } fbc->threshold = ret; From 4457a9db2bdec2360ddb15242341696108167886 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 4 May 2020 10:58:28 +0300 Subject: [PATCH 05/10] drm/i915/tgl+: Fix interrupt handling for DP AUX transactions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unmask/enable AUX interrupts on all ports on TGL+. So far the interrupts worked only on port A, which meant each transaction on other ports took 10ms. Cc: # v5.4+ Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200504075828.20348-1-imre.deak@intel.com (cherry picked from commit 054318c7e35f1d7d06b216143fff5f32405047ee) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_irq.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index d91557d842dcd..8a2b83807ffcd 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3361,7 +3361,7 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) u32 de_pipe_masked = gen8_de_pipe_fault_mask(dev_priv) | GEN8_PIPE_CDCLK_CRC_DONE; u32 de_pipe_enables; - u32 de_port_masked = GEN8_AUX_CHANNEL_A; + u32 de_port_masked = gen8_de_port_aux_mask(dev_priv); u32 de_port_enables; u32 de_misc_masked = GEN8_DE_EDP_PSR; enum pipe pipe; @@ -3369,18 +3369,8 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) <= 10) de_misc_masked |= GEN8_DE_MISC_GSE; - if (INTEL_GEN(dev_priv) >= 9) { - de_port_masked |= GEN9_AUX_CHANNEL_B | GEN9_AUX_CHANNEL_C | - GEN9_AUX_CHANNEL_D; - if (IS_GEN9_LP(dev_priv)) - de_port_masked |= BXT_DE_PORT_GMBUS; - } - - if (INTEL_GEN(dev_priv) >= 11) - de_port_masked |= ICL_AUX_CHANNEL_E; - - if (IS_CNL_WITH_PORT_F(dev_priv) || INTEL_GEN(dev_priv) >= 11) - de_port_masked |= CNL_AUX_CHANNEL_F; + if (IS_GEN9_LP(dev_priv)) + de_port_masked |= BXT_DE_PORT_GMBUS; de_pipe_enables = de_pipe_masked | GEN8_PIPE_VBLANK | GEN8_PIPE_FIFO_UNDERRUN; From af23facc38c26ac188b6adac2cae2dafaca0c82d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 3 Apr 2020 13:01:50 +0100 Subject: [PATCH 06/10] drm/i915: Check current i915_vma.pin_count status first on unbind Do an early rejection of a i915_vma_unbind() attempt if the i915_vma is currently pinned, without waiting to see if the inflight operations may unpin it. We see this problem with the shrinker trying to unbind the active vma from inside its bind worker: <6> [472.618968] Workqueue: events_unbound fence_work [i915] <4> [472.618970] Call Trace: <4> [472.618974] ? __schedule+0x2e5/0x810 <4> [472.618978] schedule+0x37/0xe0 <4> [472.618982] schedule_preempt_disabled+0xf/0x20 <4> [472.618984] __mutex_lock+0x281/0x9c0 <4> [472.618987] ? mark_held_locks+0x49/0x70 <4> [472.618989] ? _raw_spin_unlock_irqrestore+0x47/0x60 <4> [472.619038] ? i915_vma_unbind+0xae/0x110 [i915] <4> [472.619084] ? i915_vma_unbind+0xae/0x110 [i915] <4> [472.619122] i915_vma_unbind+0xae/0x110 [i915] <4> [472.619165] i915_gem_object_unbind+0x1dc/0x400 [i915] <4> [472.619208] i915_gem_shrink+0x328/0x660 [i915] <4> [472.619250] ? i915_gem_shrink_all+0x38/0x60 [i915] <4> [472.619282] i915_gem_shrink_all+0x38/0x60 [i915] <4> [472.619325] vm_alloc_page.constprop.25+0x1aa/0x240 [i915] <4> [472.619330] ? rcu_read_lock_sched_held+0x4d/0x80 <4> [472.619363] ? __alloc_pd+0xb/0x30 [i915] <4> [472.619366] ? module_assert_mutex_or_preempt+0xf/0x30 <4> [472.619368] ? __module_address+0x23/0xe0 <4> [472.619371] ? is_module_address+0x26/0x40 <4> [472.619374] ? static_obj+0x34/0x50 <4> [472.619376] ? lockdep_init_map+0x4d/0x1e0 <4> [472.619407] setup_page_dma+0xd/0x90 [i915] <4> [472.619437] alloc_pd+0x29/0x50 [i915] <4> [472.619470] __gen8_ppgtt_alloc+0x443/0x6b0 [i915] <4> [472.619503] gen8_ppgtt_alloc+0xd7/0x300 [i915] <4> [472.619535] ppgtt_bind_vma+0x2a/0xe0 [i915] <4> [472.619577] __vma_bind+0x26/0x40 [i915] <4> [472.619611] fence_work+0x1c/0x90 [i915] <4> [472.619617] process_one_work+0x26a/0x620 Fixes: 2850748ef876 ("drm/i915: Pull i915_vma_pin under the vm->mutex") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200403120150.17091-1-chris@chris-wilson.co.uk (cherry picked from commit 614654abe847a42fc75d7eb5096e46f796a438b6) (cherry picked from commit dd086cf516d9bea3878abb267f62ccc53acd764b) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_vma.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 82e3bc2806224..2cd7a7e87c0a7 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1228,18 +1228,6 @@ int __i915_vma_unbind(struct i915_vma *vma) lockdep_assert_held(&vma->vm->mutex); - /* - * First wait upon any activity as retiring the request may - * have side-effects such as unpinning or even unbinding this vma. - * - * XXX Actually waiting under the vm->mutex is a hinderance and - * should be pipelined wherever possible. In cases where that is - * unavoidable, we should lift the wait to before the mutex. - */ - ret = i915_vma_sync(vma); - if (ret) - return ret; - if (i915_vma_is_pinned(vma)) { vma_print_allocator(vma, "is pinned"); return -EAGAIN; @@ -1313,15 +1301,20 @@ int i915_vma_unbind(struct i915_vma *vma) if (!drm_mm_node_allocated(&vma->node)) return 0; - if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) - /* XXX not always required: nop_clear_range */ - wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm); - /* Optimistic wait before taking the mutex */ err = i915_vma_sync(vma); if (err) goto out_rpm; + if (i915_vma_is_pinned(vma)) { + vma_print_allocator(vma, "is pinned"); + return -EAGAIN; + } + + if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) + /* XXX not always required: nop_clear_range */ + wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm); + err = mutex_lock_interruptible(&vm->mutex); if (err) goto out_rpm; From 220dcfc1485bb79e300f08a28b475869feccbb53 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Apr 2020 14:08:11 +0100 Subject: [PATCH 07/10] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore If we find ourselves waiting on a MI_SEMAPHORE_WAIT, either within the user batch or in our own preamble, the engine raises a GT_WAIT_ON_SEMAPHORE interrupt. We can unmask that interrupt and so respond to a semaphore wait by yielding the timeslice, if we have another context to yield to! The only real complication is that the interrupt is only generated for the start of the semaphore wait, and is asynchronous to our process_csb() -- that is, we may not have registered the timeslice before we see the interrupt. To ensure we don't miss a potential semaphore blocking forward progress (e.g. selftests/live_timeslice_preempt) we mark the interrupt and apply it to the next timeslice regardless of whether it was active at the time. v2: We use semaphores in preempt-to-busy, within the timeslicing implementation itself! Ergo, when we do insert a preemption due to an expired timeslice, the new context may start with the missed semaphore flagged by the retired context and be yielded, ad infinitum. To avoid this, read the context id at the time of the semaphore interrupt and only yield if that context is still active. Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Kenneth Graunke Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200407130811.17321-1-chris@chris-wilson.co.uk (cherry picked from commit c4e8ba7390346a77ffe33ec3f210bc62e0b6c8c6) (cherry picked from commit cd60e4ac4738a6921592c4f7baf87f9a3499f0e2) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 6 +++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 9 +++++ drivers/gpu/drm/i915/gt/intel_gt_irq.c | 15 ++++++- drivers/gpu/drm/i915/gt/intel_lrc.c | 41 +++++++++++++++++--- drivers/gpu/drm/i915/gt/selftest_lrc.c | 34 ++++++++-------- drivers/gpu/drm/i915/i915_reg.h | 1 + 6 files changed, 82 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 3aa8a652c16db..883a9b7fe88d5 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1295,6 +1295,12 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7)) drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID)); + if (HAS_EXECLISTS(dev_priv)) { + drm_printf(m, "\tEL_STAT_HI: 0x%08x\n", + ENGINE_READ(engine, RING_EXECLIST_STATUS_HI)); + drm_printf(m, "\tEL_STAT_LO: 0x%08x\n", + ENGINE_READ(engine, RING_EXECLIST_STATUS_LO)); + } drm_printf(m, "\tRING_START: 0x%08x\n", ENGINE_READ(engine, RING_START)); drm_printf(m, "\tRING_HEAD: 0x%08x\n", diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index bf4d13e088876..763f8f530ded2 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -156,6 +156,15 @@ struct intel_engine_execlists { */ struct i915_priolist default_priolist; + /** + * @yield: CCID at the time of the last semaphore-wait interrupt. + * + * Instead of leaving a semaphore busy-spinning on an engine, we would + * like to switch to another ready context, i.e. yielding the semaphore + * timeslice. + */ + u32 yield; + /** * @error_interrupt: CS Master EIR * diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index f0e7fd95165a7..0cc7dd54f4f96 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -39,6 +39,15 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir) } } + if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) { + WRITE_ONCE(engine->execlists.yield, + ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI)); + ENGINE_TRACE(engine, "semaphore yield: %08x\n", + engine->execlists.yield); + if (del_timer(&engine->execlists.timer)) + tasklet = true; + } + if (iir & GT_CONTEXT_SWITCH_INTERRUPT) tasklet = true; @@ -228,7 +237,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt) const u32 irqs = GT_CS_MASTER_ERROR_INTERRUPT | GT_RENDER_USER_INTERRUPT | - GT_CONTEXT_SWITCH_INTERRUPT; + GT_CONTEXT_SWITCH_INTERRUPT | + GT_WAIT_SEMAPHORE_INTERRUPT; struct intel_uncore *uncore = gt->uncore; const u32 dmask = irqs << 16 | irqs; const u32 smask = irqs << 16; @@ -366,7 +376,8 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt) const u32 irqs = GT_CS_MASTER_ERROR_INTERRUPT | GT_RENDER_USER_INTERRUPT | - GT_CONTEXT_SWITCH_INTERRUPT; + GT_CONTEXT_SWITCH_INTERRUPT | + GT_WAIT_SEMAPHORE_INTERRUPT; const u32 gt_interrupts[] = { irqs << GEN8_RCS_IRQ_SHIFT | irqs << GEN8_BCS_IRQ_SHIFT, irqs << GEN8_VCS0_IRQ_SHIFT | irqs << GEN8_VCS1_IRQ_SHIFT, diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 017fd5bf9c693..a21b962c736f8 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1754,7 +1754,8 @@ static void defer_active(struct intel_engine_cs *engine) } static bool -need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq) +need_timeslice(const struct intel_engine_cs *engine, + const struct i915_request *rq) { int hint; @@ -1768,6 +1769,32 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq) return hint >= effective_prio(rq); } +static bool +timeslice_yield(const struct intel_engine_execlists *el, + const struct i915_request *rq) +{ + /* + * Once bitten, forever smitten! + * + * If the active context ever busy-waited on a semaphore, + * it will be treated as a hog until the end of its timeslice (i.e. + * until it is scheduled out and replaced by a new submission, + * possibly even its own lite-restore). The HW only sends an interrupt + * on the first miss, and we do know if that semaphore has been + * signaled, or even if it is now stuck on another semaphore. Play + * safe, yield if it might be stuck -- it will be given a fresh + * timeslice in the near future. + */ + return upper_32_bits(rq->context->lrc_desc) == READ_ONCE(el->yield); +} + +static bool +timeslice_expired(const struct intel_engine_execlists *el, + const struct i915_request *rq) +{ + return timer_expired(&el->timer) || timeslice_yield(el, rq); +} + static int switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq) { @@ -1783,8 +1810,7 @@ timeslice(const struct intel_engine_cs *engine) return READ_ONCE(engine->props.timeslice_duration_ms); } -static unsigned long -active_timeslice(const struct intel_engine_cs *engine) +static unsigned long active_timeslice(const struct intel_engine_cs *engine) { const struct intel_engine_execlists *execlists = &engine->execlists; const struct i915_request *rq = *execlists->active; @@ -1946,13 +1972,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine) last = NULL; } else if (need_timeslice(engine, last) && - timer_expired(&engine->execlists.timer)) { + timeslice_expired(execlists, last)) { ENGINE_TRACE(engine, - "expired last=%llx:%lld, prio=%d, hint=%d\n", + "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n", last->fence.context, last->fence.seqno, last->sched.attr.priority, - execlists->queue_priority_hint); + execlists->queue_priority_hint, + yesno(timeslice_yield(execlists, last))); ring_set_paused(engine, 1); defer_active(engine); @@ -2213,6 +2240,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) } clear_ports(port + 1, last_port - port); + WRITE_ONCE(execlists->yield, -1); execlists_submit_ports(engine); set_preempt_timeout(engine, *active); } else { @@ -4452,6 +4480,7 @@ logical_ring_default_irqs(struct intel_engine_cs *engine) engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift; + engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift; } static void rcs_submission_override(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 6f06ba750a0a8..f95ae15ce865c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -929,7 +929,7 @@ create_rewinder(struct intel_context *ce, goto err; } - cs = intel_ring_begin(rq, 10); + cs = intel_ring_begin(rq, 14); if (IS_ERR(cs)) { err = PTR_ERR(cs); goto err; @@ -941,8 +941,8 @@ create_rewinder(struct intel_context *ce, *cs++ = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_GLOBAL_GTT | MI_SEMAPHORE_POLL | - MI_SEMAPHORE_SAD_NEQ_SDD; - *cs++ = 0; + MI_SEMAPHORE_SAD_GTE_SDD; + *cs++ = idx; *cs++ = offset; *cs++ = 0; @@ -951,6 +951,11 @@ create_rewinder(struct intel_context *ce, *cs++ = offset + idx * sizeof(u32); *cs++ = 0; + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = offset; + *cs++ = 0; + *cs++ = idx + 1; + intel_ring_advance(rq, cs); rq->sched.attr.priority = I915_PRIORITY_MASK; @@ -984,7 +989,7 @@ static int live_timeslice_rewind(void *arg) for_each_engine(engine, gt, id) { enum { A1, A2, B1 }; - enum { X = 1, Y, Z }; + enum { X = 1, Z, Y }; struct i915_request *rq[3] = {}; struct intel_context *ce; unsigned long heartbeat; @@ -1017,13 +1022,13 @@ static int live_timeslice_rewind(void *arg) goto err; } - rq[0] = create_rewinder(ce, NULL, slot, 1); + rq[0] = create_rewinder(ce, NULL, slot, X); if (IS_ERR(rq[0])) { intel_context_put(ce); goto err; } - rq[1] = create_rewinder(ce, NULL, slot, 2); + rq[1] = create_rewinder(ce, NULL, slot, Y); intel_context_put(ce); if (IS_ERR(rq[1])) goto err; @@ -1041,7 +1046,7 @@ static int live_timeslice_rewind(void *arg) goto err; } - rq[2] = create_rewinder(ce, rq[0], slot, 3); + rq[2] = create_rewinder(ce, rq[0], slot, Z); intel_context_put(ce); if (IS_ERR(rq[2])) goto err; @@ -1055,15 +1060,12 @@ static int live_timeslice_rewind(void *arg) GEM_BUG_ON(!timer_pending(&engine->execlists.timer)); /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */ - GEM_BUG_ON(!i915_request_is_active(rq[A1])); - GEM_BUG_ON(!i915_request_is_active(rq[A2])); - GEM_BUG_ON(!i915_request_is_active(rq[B1])); - - /* Wait for the timeslice to kick in */ - del_timer(&engine->execlists.timer); - tasklet_hi_schedule(&engine->execlists.tasklet); - intel_engine_flush_submission(engine); - + if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */ + /* Wait for the timeslice to kick in */ + del_timer(&engine->execlists.timer); + tasklet_hi_schedule(&engine->execlists.tasklet); + intel_engine_flush_submission(engine); + } /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */ GEM_BUG_ON(!i915_request_is_active(rq[A1])); GEM_BUG_ON(!i915_request_is_active(rq[B1])); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e0c6021fdaf98..6e12000c4b6b5 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3094,6 +3094,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GT_BSD_CS_ERROR_INTERRUPT (1 << 15) #define GT_BSD_USER_INTERRUPT (1 << 12) #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 (1 << 11) /* hsw+; rsvd on snb, ivb, vlv */ +#define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11) /* bdw+ */ #define GT_CONTEXT_SWITCH_INTERRUPT (1 << 8) #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT (1 << 5) /* !snb */ #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT (1 << 4) From 47bf7b7a7151bad568b9523d14477a353a450066 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 20 Apr 2020 13:53:55 +0100 Subject: [PATCH 08/10] drm/i915/gem: Remove object_is_locked assertion from unpin_from_display_plane Since moving the obj->vma.list to a spin_lock, and the vm->bound_list to its vm->mutex, along with tracking shrinkable status under its own spinlock, we no long require the object to be locked by the caller. This is fortunate as it appears we can be called with the lock along an error path in flipping: <4> [139.942851] WARN_ON(debug_locks && !lock_is_held(&(&((obj)->base.resv)->lock.base)->dep_map)) <4> [139.943242] WARNING: CPU: 0 PID: 1203 at drivers/gpu/drm/i915/gem/i915_gem_domain.c:405 i915_gem_object_unpin_from_display_plane+0x70/0x130 [i915] <4> [139.943263] Modules linked in: snd_hda_intel i915 vgem snd_hda_codec_realtek snd_hda_codec_generic coretemp snd_intel_dspcfg snd_hda_codec snd_hwdep snd_hda_core r8169 lpc_ich snd_pcm realtek prime_numbers [last unloaded: i915] <4> [139.943347] CPU: 0 PID: 1203 Comm: kms_flip Tainted: G U 5.6.0-gd0fda5c2cf3f1-drmtip_474+ #1 <4> [139.943363] Hardware name: /D510MO, BIOS MOPNV10J.86A.0311.2010.0802.2346 08/02/2010 <4> [139.943589] RIP: 0010:i915_gem_object_unpin_from_display_plane+0x70/0x130 [i915] <4> [139.943589] Code: 85 28 01 00 00 be ff ff ff ff 48 8d 78 60 e8 d7 9b f0 e2 85 c0 75 b9 48 c7 c6 50 b9 38 c0 48 c7 c7 e9 48 3c c0 e8 20 d4 e9 e2 <0f> 0b eb a2 48 c7 c1 08 bb 38 c0 ba 0a 01 00 00 48 c7 c6 88 a3 35 <4> [139.943589] RSP: 0018:ffffb774c0603b48 EFLAGS: 00010282 <4> [139.943589] RAX: 0000000000000000 RBX: ffff9a142fa36e80 RCX: 0000000000000006 <4> [139.943589] RDX: 000000000000160d RSI: ffff9a142c1a88f8 RDI: ffffffffa434a64d <4> [139.943589] RBP: ffff9a1410a513c0 R08: ffff9a142c1a88f8 R09: 0000000000000000 <4> [139.943589] R10: 0000000000000000 R11: 0000000000000000 R12: ffff9a1436ee94b8 <4> [139.943589] R13: 0000000000000001 R14: 00000000ffffffff R15: ffff9a1410960000 <4> [139.943589] FS: 00007fc73a744e40(0000) GS:ffff9a143da00000(0000) knlGS:0000000000000000 <4> [139.943589] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4> [139.943589] CR2: 00007fc73997e098 CR3: 000000002f5fe000 CR4: 00000000000006f0 <4> [139.943589] Call Trace: <4> [139.943589] intel_pin_and_fence_fb_obj+0x1c9/0x1f0 [i915] <4> [139.943589] intel_plane_pin_fb+0x3f/0xd0 [i915] <4> [139.943589] intel_prepare_plane_fb+0x13b/0x5c0 [i915] <4> [139.943589] drm_atomic_helper_prepare_planes+0x85/0x110 <4> [139.943589] intel_atomic_commit+0xda/0x390 [i915] <4> [139.943589] drm_atomic_helper_page_flip+0x9c/0xd0 <4> [139.943589] ? drm_event_reserve_init+0x46/0x60 <4> [139.943589] drm_mode_page_flip_ioctl+0x587/0x5d0 This completes the symmetry lost in commit 8b1c78e06e61 ("drm/i915: Avoid calling i915_gem_object_unbind holding object lock"). Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1743 Fixes: 8b1c78e06e61 ("drm/i915: Avoid calling i915_gem_object_unbind holding object lock") Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Andi Shyti Cc: # v5.6+ Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200420125356.26614-1-chris@chris-wilson.co.uk (cherry picked from commit a95f3ac21d64d62c746f836598d1467d5837fa28) (cherry picked from commit 2208b85fa1766ee4821a9435d548578b67090531) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 0cc40e77bbd2f..4f96c8788a2ec 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -368,7 +368,6 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_vma *vma; - GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); if (!atomic_read(&obj->bind_count)) return; @@ -400,12 +399,8 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) { - struct drm_i915_gem_object *obj = vma->obj; - - assert_object_held(obj); - /* Bump the LRU to try and avoid premature eviction whilst flipping */ - i915_gem_object_bump_inactive_ggtt(obj); + i915_gem_object_bump_inactive_ggtt(vma->obj); i915_vma_unpin(vma); } From 53b2622e774681943230fb9f31096512fff99fe7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 28 Apr 2020 19:47:49 +0100 Subject: [PATCH 09/10] drm/i915/execlists: Avoid reusing the same logical CCID The bspec is confusing on the nature of the upper 32bits of the LRC descriptor. Once upon a time, it said that it uses the upper 32b to decide if it should perform a lite-restore, and so we must ensure that each unique context submitted to HW is given a unique CCID [for the duration of it being on the HW]. Currently, this is achieved by using a small circular tag, and assigning every context submitted to HW a new id. However, this tag is being cleared on repinning an inflight context such that we end up re-using the 0 tag for multiple contexts. To avoid accidentally clearing the CCID in the upper 32bits of the LRC descriptor, split the descriptor into two dwords so we can update the GGTT address separately from the CCID. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1796 Fixes: 2935ed5339c4 ("drm/i915: Remove logical HW ID") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: # v5.5+ Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200428184751.11257-1-chris@chris-wilson.co.uk (cherry picked from commit 2632f174a2e1a5fd40a70404fa8ccfd0b1f79ebd) (cherry picked from commit a4b70fcc587860f4b972f68217d8ebebe295ec15) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_context_types.h | 8 ++- drivers/gpu/drm/i915/gt/intel_engine_types.h | 5 ++ drivers/gpu/drm/i915/gt/intel_lrc.c | 50 ++++++++----------- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 +- drivers/gpu/drm/i915/gvt/scheduler.c | 4 +- drivers/gpu/drm/i915/i915_perf.c | 3 +- 6 files changed, 37 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 07cb83a0d0171..ca0d4f4f3615b 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -69,7 +69,13 @@ struct intel_context { #define CONTEXT_NOPREEMPT 7 u32 *lrc_reg_state; - u64 lrc_desc; + union { + struct { + u32 lrca; + u32 ccid; + }; + u64 desc; + } lrc; u32 tag; /* cookie passed to HW to track this context on submission */ /* Time on GPU as tracked by the hw. */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 763f8f530ded2..8dd210a2c340a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -156,6 +156,11 @@ struct intel_engine_execlists { */ struct i915_priolist default_priolist; + /** + * @ccid: identifier for contexts submitted to this engine + */ + u32 ccid; + /** * @yield: CCID at the time of the last semaphore-wait interrupt. * diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index a21b962c736f8..e8b02f84aa3d0 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -456,10 +456,10 @@ assert_priority_queue(const struct i915_request *prev, * engine info, SW context ID and SW counter need to form a unique number * (Context ID) per lrc. */ -static u64 +static u32 lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) { - u64 desc; + u32 desc; desc = INTEL_LEGACY_32B_CONTEXT; if (i915_vm_is_4lvl(ce->vm)) @@ -470,21 +470,7 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) if (IS_GEN(engine->i915, 8)) desc |= GEN8_CTX_L3LLC_COHERENT; - desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */ - /* - * The following 32bits are copied into the OA reports (dword 2). - * Consider updating oa_get_render_ctx_id in i915_perf.c when changing - * anything below. - */ - if (INTEL_GEN(engine->i915) >= 11) { - desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT; - /* bits 48-53 */ - - desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT; - /* bits 61-63 */ - } - - return desc; + return i915_ggtt_offset(ce->state) | desc; } static inline unsigned int dword_in_page(void *addr) @@ -1192,7 +1178,7 @@ static void reset_active(struct i915_request *rq, __execlists_update_reg_state(ce, engine, head); /* We've switched away, so this should be a no-op, but intent matters */ - ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; + ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; } static u32 intel_context_get_runtime(const struct intel_context *ce) @@ -1251,18 +1237,19 @@ __execlists_schedule_in(struct i915_request *rq) if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) execlists_check_context(ce, engine); - ce->lrc_desc &= ~GENMASK_ULL(47, 37); if (ce->tag) { /* Use a fixed tag for OA and friends */ - ce->lrc_desc |= (u64)ce->tag << 32; + ce->lrc.ccid = ce->tag; } else { /* We don't need a strict matching tag, just different values */ - ce->lrc_desc |= - (u64)(++engine->context_tag % NUM_CONTEXT_TAG) << - GEN11_SW_CTX_ID_SHIFT; + ce->lrc.ccid = + (++engine->context_tag % NUM_CONTEXT_TAG) << + (GEN11_SW_CTX_ID_SHIFT - 32); BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID); } + ce->lrc.ccid |= engine->execlists.ccid; + __intel_gt_pm_get(engine->gt); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); intel_engine_context_in(engine); @@ -1361,7 +1348,7 @@ execlists_schedule_out(struct i915_request *rq) static u64 execlists_update_context(struct i915_request *rq) { struct intel_context *ce = rq->context; - u64 desc = ce->lrc_desc; + u64 desc = ce->lrc.desc; u32 tail, prev; /* @@ -1400,7 +1387,7 @@ static u64 execlists_update_context(struct i915_request *rq) */ wmb(); - ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE; + ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE; return desc; } @@ -1785,7 +1772,7 @@ timeslice_yield(const struct intel_engine_execlists *el, * safe, yield if it might be stuck -- it will be given a fresh * timeslice in the near future. */ - return upper_32_bits(rq->context->lrc_desc) == READ_ONCE(el->yield); + return rq->context->lrc.ccid == READ_ONCE(el->yield); } static bool @@ -3071,7 +3058,7 @@ __execlists_context_pin(struct intel_context *ce, if (IS_ERR(vaddr)) return PTR_ERR(vaddr); - ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE; + ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE; ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; __execlists_update_reg_state(ce, engine, ce->ring->tail); @@ -3100,7 +3087,7 @@ static void execlists_context_reset(struct intel_context *ce) ce, ce->engine, ce->ring, true); __execlists_update_reg_state(ce, ce->engine, ce->ring->tail); - ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; + ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; } static const struct intel_context_ops execlists_context_ops = { @@ -3781,7 +3768,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) head, ce->ring->tail); __execlists_reset_reg_state(ce, engine); __execlists_update_reg_state(ce, engine, head); - ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ + ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ unwind: /* Push back any incomplete requests for replay after the reset. */ @@ -4548,6 +4535,11 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) else execlists->csb_size = GEN11_CSB_ENTRIES; + if (INTEL_GEN(engine->i915) >= 11) { + execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32); + execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32); + } + reset_csb_pointers(engine); /* Finally, take ownership and responsibility for cleanup! */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index fe7778c28d2d7..aa6d56e25a10a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -217,7 +217,7 @@ static void guc_wq_item_append(struct intel_guc *guc, static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; - u32 ctx_desc = lower_32_bits(rq->context->lrc_desc); + u32 ctx_desc = rq->context->lrc.ccid; u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); guc_wq_item_append(guc, engine->guc_id, ctx_desc, diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index cb11c31840857..6eb6710b35e9b 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -290,7 +290,7 @@ static void shadow_context_descriptor_update(struct intel_context *ce, struct intel_vgpu_workload *workload) { - u64 desc = ce->lrc_desc; + u64 desc = ce->lrc.desc; /* * Update bits 0-11 of the context descriptor which includes flags @@ -300,7 +300,7 @@ shadow_context_descriptor_update(struct intel_context *ce, desc |= (u64)workload->ctx_desc.addressing_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; - ce->lrc_desc = desc; + ce->lrc.desc = desc; } static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 66a46e41d5ef9..b5030192be3ef 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1310,8 +1310,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) * dropped by GuC. They won't be part of the context * ID in the OA reports, so squash those lower bits. */ - stream->specific_ctx_id = - lower_32_bits(ce->lrc_desc) >> 12; + stream->specific_ctx_id = ce->lrc.lrca >> 12; /* * GuC uses the top bit to signal proxy submission, so From 1bc6a60143a4f9264cc6e09ceb9919f4e813a872 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 28 Apr 2020 19:47:50 +0100 Subject: [PATCH 10/10] drm/i915/execlists: Track inflight CCID The presumption is that by using a circular counter that is twice as large as the maximum ELSP submission, we would never reuse the same CCID for two inflight contexts. However, if we continually preempt an active context such that it always remains inflight, it can be resubmitted with an arbitrary number of paired contexts. As each of its paired contexts will use a new CCID, eventually it will wrap and submit two ELSP with the same CCID. Rather than use a simple circular counter, switch over to a small bitmap of inflight ids so we can avoid reusing one that is still potentially active. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1796 Fixes: 2935ed5339c4 ("drm/i915: Remove logical HW ID") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: # v5.5+ Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200428184751.11257-2-chris@chris-wilson.co.uk (cherry picked from commit 5c4a53e3b1cbc38d0906e382f1037290658759bb) (cherry picked from commit 134711240307d5586ae8e828d2699db70a8b74f2) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_engine_types.h | 3 +- drivers/gpu/drm/i915/gt/intel_lrc.c | 29 +++++++++++++++----- drivers/gpu/drm/i915/i915_perf.c | 3 +- drivers/gpu/drm/i915/selftests/i915_vma.c | 2 +- 4 files changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 8dd210a2c340a..0be674ae1cf63 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -309,8 +309,7 @@ struct intel_engine_cs { u32 context_size; u32 mmio_base; - unsigned int context_tag; -#define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS) + unsigned long context_tag; struct rb_node uabi_node; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index e8b02f84aa3d0..77420372d8134 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1239,13 +1239,17 @@ __execlists_schedule_in(struct i915_request *rq) if (ce->tag) { /* Use a fixed tag for OA and friends */ + GEM_BUG_ON(ce->tag <= BITS_PER_LONG); ce->lrc.ccid = ce->tag; } else { /* We don't need a strict matching tag, just different values */ - ce->lrc.ccid = - (++engine->context_tag % NUM_CONTEXT_TAG) << - (GEN11_SW_CTX_ID_SHIFT - 32); - BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID); + unsigned int tag = ffs(engine->context_tag); + + GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG); + clear_bit(tag - 1, &engine->context_tag); + ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32); + + BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID); } ce->lrc.ccid |= engine->execlists.ccid; @@ -1289,7 +1293,8 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce) static inline void __execlists_schedule_out(struct i915_request *rq, - struct intel_engine_cs * const engine) + struct intel_engine_cs * const engine, + unsigned int ccid) { struct intel_context * const ce = rq->context; @@ -1307,6 +1312,14 @@ __execlists_schedule_out(struct i915_request *rq, i915_request_completed(rq)) intel_engine_add_retire(engine, ce->timeline); + ccid >>= GEN11_SW_CTX_ID_SHIFT - 32; + ccid &= GEN12_MAX_CONTEXT_HW_ID; + if (ccid < BITS_PER_LONG) { + GEM_BUG_ON(ccid == 0); + GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag)); + set_bit(ccid - 1, &engine->context_tag); + } + intel_context_update_runtime(ce); intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); @@ -1332,15 +1345,17 @@ execlists_schedule_out(struct i915_request *rq) { struct intel_context * const ce = rq->context; struct intel_engine_cs *cur, *old; + u32 ccid; trace_i915_request_out(rq); + ccid = rq->context->lrc.ccid; old = READ_ONCE(ce->inflight); do cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL; while (!try_cmpxchg(&ce->inflight, &old, cur)); if (!cur) - __execlists_schedule_out(rq, old); + __execlists_schedule_out(rq, old, ccid); i915_request_put(rq); } @@ -3556,7 +3571,7 @@ static void enable_execlists(struct intel_engine_cs *engine) enable_error_interrupt(engine); - engine->context_tag = 0; + engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0); } static bool unexpected_starting_state(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index b5030192be3ef..cf2c01f17da83 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1327,11 +1327,10 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); /* * Pick an unused context id - * 0 - (NUM_CONTEXT_TAG - 1) are used by other contexts + * 0 - BITS_PER_LONG are used by other contexts * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context */ stream->specific_ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); - BUILD_BUG_ON((GEN12_MAX_CONTEXT_HW_ID - 1) < NUM_CONTEXT_TAG); break; } diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index 58b5f40a07dd6..af89c7fc8f593 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -173,7 +173,7 @@ static int igt_vma_create(void *arg) } nc = 0; - for_each_prime_number(num_ctx, 2 * NUM_CONTEXT_TAG) { + for_each_prime_number(num_ctx, 2 * BITS_PER_LONG) { for (; nc < num_ctx; nc++) { ctx = mock_context(i915, "mock"); if (!ctx)