From d29926fa5f580565a7240c4d6a428795c773fb76 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 6 Nov 2019 13:31:29 +0000 Subject: [PATCH 01/12] drm/i915/gt: Only drop heartbeat.systole if the sole owner Mika spotted that only using cancel_delayed_work() could mean that we attempted to clear the heartbeat.systole while the worker was still running. Rectify the situation by only touching the systole from outside the worker if we suceeded in cancelling the worker before it could run. The worker is expected to clean up by itself upon idling. Reported-by: Mika Kuoppala Fixes: 058179e72e09 ("drm/i915/gt: Replace hangcheck by heartbeats") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20191106133129.17732-1-chris@chris-wilson.co.uk (cherry picked from commit 841e86728615baa77b0ea9d8b357e66052c75fe5) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 5051f304705be..06aa14c7aa8c7 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -141,8 +141,8 @@ void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine) void intel_engine_park_heartbeat(struct intel_engine_cs *engine) { - cancel_delayed_work(&engine->heartbeat.work); - i915_request_put(fetch_and_zero(&engine->heartbeat.systole)); + if (cancel_delayed_work(&engine->heartbeat.work)) + i915_request_put(fetch_and_zero(&engine->heartbeat.systole)); } void intel_engine_init_heartbeat(struct intel_engine_cs *engine) From 6300c663725187f80de53631bf516971505327f2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 6 Nov 2019 14:41:55 +0000 Subject: [PATCH 02/12] drm/i915/gem: Fix error path to unlock if the GEM context is closed When inside the lock, remember to unlock even if you want to leave early. Reported-by: Dan Carpenter Fixes: a4e7ccdac38e ("drm/i915: Move context management under GEM") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191106144155.25727-1-chris@chris-wilson.co.uk (cherry picked from commit feba2b8146633390f8df44946eceb4274f7377ed) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index de6e55af82cf7..d49869b1aa10e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1136,7 +1136,7 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, if (i915_gem_context_is_closed(ctx)) { err = -ENOENT; - goto out; + goto unlock; } if (vm == rcu_access_pointer(ctx->vm)) From 56a327f983f4e66a9cbb15bcc39499dc3107c58d Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 29 Oct 2019 12:39:47 +0200 Subject: [PATCH 03/12] drm/i915/display: only include intel_dp_link_training.h where needed The intel_dp_link_training.h include has no need or place in intel_display.h. Include it in intel_display.c instead. Cc: Manasi Navare Fixes: eadf6f9170d5 ("drm/i915/display/icl: Enable master-slaves in trans port sync") Reviewed-by: Manasi Navare Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20191029103947.7535-1-jani.nikula@intel.com (cherry picked from commit 3c954c418eb363343ff515756e440aa1dc216e0b) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_display.c | 1 + drivers/gpu/drm/i915/display/intel_display.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 348ce04566961..fda69172043b7 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -66,6 +66,7 @@ #include "intel_cdclk.h" #include "intel_color.h" #include "intel_display_types.h" +#include "intel_dp_link_training.h" #include "intel_fbc.h" #include "intel_fbdev.h" #include "intel_fifo_underrun.h" diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 355c50088589b..f417e0948001e 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -27,7 +27,6 @@ #include #include -#include "intel_dp_link_training.h" enum link_m_n_set; struct dpll; From 3cac195875ef5b032a3ea5d4205b0c45a6a95863 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 6 Nov 2019 22:12:23 +0000 Subject: [PATCH 04/12] drm/i915: Leave the aliasing-ppgtt size alone MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hidden aliasing-ppgtt's size is never revealed, as we only inspect the front GTT when engaged. However, we were "fixing" the hidden ppgtt to match, with the net result that we ended up leaking the unused portion on Braswell were we preallocated the entire set of top level PDP, see gen8_preallocate_top_level_pdp(). [ 26.025364] DMA-API: pci 0000:00:02.0: device driver has pending DMA allocations while released from device [count=2] [ 26.025364] One of leaked entries details: [device address=0x0000000230778000] [size=4096 bytes] [mapped with DMA_BIDIRECTIONAL] [mapped as single] [ 26.025683] WARNING: CPU: 0 PID: 415 at kernel/dma/debug.c:894 dma_debug_device_change+0x1a4/0x1f0 [ 26.025905] Modules linked in: i915(E-) intel_powerclamp(E) nls_ascii(E) nls_cp437(E) crct10dif_pclmul(E) crc32_pclmul(E) vfat(E) crc32c_intel(E) fat(E) ghash_clmulni_intel(E) prime_numbers(E) intel_gtt(E) i2c_algo_bit(E) efi_pstore(E) drm_kms_helper(E) syscopyarea(E) sysfillrect(E) sysimgblt(E) fb_sys_fops(E) evdev(E) drm(E) aesni_intel(E) glue_helper(E) crypto_simd(E) cryptd(E) intel_cstate(E) sg(E) efivars(E) pcspkr(E) video(E) button(E) efivarfs(E) ip_tables(E) x_tables(E) autofs4(E) sd_mod(E) lpc_ich(E) ahci(E) mfd_core(E) i2c_i801(E) libahci(E) i2c_designware_pci(E) i2c_designware_core(E) [ 26.026613] CPU: 0 PID: 415 Comm: rmmod Tainted: G E 5.4.0-rc6+ #25 [ 26.026837] Hardware name: /, BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015 [ 26.027080] RIP: 0010:dma_debug_device_change+0x1a4/0x1f0 [ 26.027319] Code: 89 54 24 08 e8 ad 60 62 00 48 8b 54 24 08 48 89 c6 41 57 4d 89 e9 49 89 d8 44 89 f1 41 54 48 c7 c7 e0 61 06 82 e8 c1 aa f5 ff <0f> 0b 5a 59 48 83 3c 24 00 0f 85 97 26 00 00 8b 05 77 47 92 01 85 [ 26.027600] RSP: 0018:ffff888228d2fcc8 EFLAGS: 00010282 [ 26.027831] RAX: 0000000000000000 RBX: 0000000230778000 RCX: 0000000000000000 [ 26.028053] RDX: 0000000000000001 RSI: 0000000000000008 RDI: ffffed10451a5f8f [ 26.028279] RBP: ffff88823480c0b0 R08: 0000000000000001 R09: ffffed1046e83eb1 [ 26.028500] R10: ffffed1046e83eb0 R11: ffff88823741f587 R12: ffffffff82067340 [ 26.028725] R13: 0000000000001000 R14: 0000000000000002 R15: ffffffff82067480 [ 26.028952] FS: 00007fdf3ed174c0(0000) GS:ffff888237400000(0000) knlGS:0000000000000000 [ 26.029185] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 26.029405] CR2: 000055e211109030 CR3: 0000000230139000 CR4: 00000000001006f0 [ 26.029622] Call Trace: [ 26.029846] notifier_call_chain+0x67/0xa0 [ 26.030076] blocking_notifier_call_chain+0x5a/0x80 [ 26.030305] device_release_driver_internal+0x20d/0x260 [ 26.030535] driver_detach+0x7b/0xe1 [ 26.030761] bus_remove_driver+0x8c/0x153 [ 26.030993] pci_unregister_driver+0x2d/0xf0 [ 26.032603] i915_exit+0x16/0x1c [i915] Reported-by: Ville Syrjälä Fixes: 1eda701eace2 ("drm/i915/gtt: Recursive cleanup for gen8") References: c082afac86cb ("drm/i915: Move aliasing_ppgtt underneath its i915_ggtt") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20191106221223.7437-1-chris@chris-wilson.co.uk (cherry picked from commit 2b0a4fc25ad8e3da4a156995a513dca6abf247de) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 88179202c5565..6239a9adbf14e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2609,8 +2609,6 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma); ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; - ppgtt->vm.total = ggtt->vm.total; - return 0; err_ppgtt: From aeec766133f99d45aad60d650de50fb382104d95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Nov 2019 19:23:49 +0200 Subject: [PATCH 05/12] drm/i915: Don't oops in dumb_create ioctl if we have no crtcs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure we have a crtc before probing its primary plane's max stride. Initially I thought we can't get this far without crtcs, but looks like we can via the dumb_create ioctl. Not sure if we shouldn't disable dumb buffer support entirely when we have no crtcs, but that would require some amount of work as the only thing currently being checked is dev->driver->dumb_create which we'd have to convert to some device specific dynamic thing. Cc: stable@vger.kernel.org Reported-by: Mika Kuoppala Fixes: aa5ca8b7421c ("drm/i915: Align dumb buffer stride to 4k to allow for gtt remapping") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191106172349.11987-1-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson (cherry picked from commit baea9ffe64200033499a4955f431e315bb807899) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_display.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index fda69172043b7..36a7cd9bc66ce 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -2529,6 +2529,9 @@ u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv, * the highest stride limits of them all. */ crtc = intel_get_crtc_for_pipe(dev_priv, PIPE_A); + if (!crtc) + return 0; + plane = to_intel_plane(crtc->base.primary); return plane->max_stride(plane, pixel_format, modifier, From f77021372e2880237278e0ee57faadc077a8256a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 30 Oct 2019 21:08:15 +0200 Subject: [PATCH 06/12] drm/i915: Preload LUTs if the hw isn't currently using them MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The LUTs are single buffered so in order to program them without tearing we'd have to do it during vblank (actually to be 100% effective it has to happen between start of vblank and frame start). We have no proper mechanism for that at the moment so we just defer loading them after the vblank waits have happened. That is not quite sufficient (especially when committing multiple pipes whose vblanks don't line up) so the LUT load will often leak into the following frame causing tearing. However in case the hardware wasn't previously using the LUT we can preload it before setting the enable bit (which is double buffered so won't tear). Let's determine if we can do such preloading and make it happen. Slight variation between the hardware requires some platforms specifics in the checks. Hans is seeing ugly colored flash on VLV/CHV macchines (GPD win and Asus T100HA) when the gamma LUT gets loaded for the first time as the BIOS has left some junk in the LUT memory. v2: Deal with uapi vs. hw crtc state split s/GCM/CGM/ typo fix Cc: Hans de Goede Fixes: 051a6d8d3ca0 ("drm/i915: Move LUT programming to happen after vblank waits") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20191030190815.7359-1-ville.syrjala@linux.intel.com Tested-by: Hans de Goede Reviewed-by: Hans de Goede (cherry picked from commit 0ccc42a2fd5107a7f58e62c8b35b61de9a70ce82) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_atomic.c | 1 + drivers/gpu/drm/i915/display/intel_color.c | 61 +++++++++++++++++++ drivers/gpu/drm/i915/display/intel_display.c | 6 ++ .../drm/i915/display/intel_display_types.h | 1 + 4 files changed, 69 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index 9cd6d2348a1ed..c2875b10adf9d 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -200,6 +200,7 @@ intel_crtc_duplicate_state(struct drm_crtc *crtc) crtc_state->update_wm_pre = false; crtc_state->update_wm_post = false; crtc_state->fifo_changed = false; + crtc_state->preload_luts = false; crtc_state->wm.need_postvbl_update = false; crtc_state->fb_bits = 0; crtc_state->update_planes = 0; diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index fa44eb73d088d..aa3a063549c3e 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1022,6 +1022,55 @@ void intel_color_commit(const struct intel_crtc_state *crtc_state) dev_priv->display.color_commit(crtc_state); } +static bool intel_can_preload_luts(const struct intel_crtc_state *new_crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct intel_atomic_state *state = + to_intel_atomic_state(new_crtc_state->base.state); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + + return !old_crtc_state->base.gamma_lut && + !old_crtc_state->base.degamma_lut; +} + +static bool chv_can_preload_luts(const struct intel_crtc_state *new_crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct intel_atomic_state *state = + to_intel_atomic_state(new_crtc_state->base.state); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + + /* + * CGM_PIPE_MODE is itself single buffered. We'd have to + * somehow split it out from chv_load_luts() if we wanted + * the ability to preload the CGM LUTs/CSC without tearing. + */ + if (old_crtc_state->cgm_mode || new_crtc_state->cgm_mode) + return false; + + return !old_crtc_state->base.gamma_lut; +} + +static bool glk_can_preload_luts(const struct intel_crtc_state *new_crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct intel_atomic_state *state = + to_intel_atomic_state(new_crtc_state->base.state); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + + /* + * The hardware degamma is active whenever the pipe + * CSC is active. Thus even if the old state has no + * software degamma we need to avoid clobbering the + * linear hardware degamma mid scanout. + */ + return !old_crtc_state->csc_enable && + !old_crtc_state->base.gamma_lut; +} + int intel_color_check(struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); @@ -1165,6 +1214,8 @@ static int i9xx_color_check(struct intel_crtc_state *crtc_state) if (ret) return ret; + crtc_state->preload_luts = intel_can_preload_luts(crtc_state); + return 0; } @@ -1217,6 +1268,8 @@ static int chv_color_check(struct intel_crtc_state *crtc_state) if (ret) return ret; + crtc_state->preload_luts = chv_can_preload_luts(crtc_state); + return 0; } @@ -1271,6 +1324,8 @@ static int ilk_color_check(struct intel_crtc_state *crtc_state) if (ret) return ret; + crtc_state->preload_luts = intel_can_preload_luts(crtc_state); + return 0; } @@ -1328,6 +1383,8 @@ static int ivb_color_check(struct intel_crtc_state *crtc_state) if (ret) return ret; + crtc_state->preload_luts = intel_can_preload_luts(crtc_state); + return 0; } @@ -1366,6 +1423,8 @@ static int glk_color_check(struct intel_crtc_state *crtc_state) if (ret) return ret; + crtc_state->preload_luts = glk_can_preload_luts(crtc_state); + return 0; } @@ -1415,6 +1474,8 @@ static int icl_color_check(struct intel_crtc_state *crtc_state) crtc_state->csc_mode = icl_csc_mode(crtc_state); + crtc_state->preload_luts = intel_can_preload_luts(crtc_state); + return 0; } diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 36a7cd9bc66ce..6f5e3bd13ad18 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -14205,6 +14205,11 @@ static void intel_update_crtc(struct intel_crtc *crtc, /* vblanks work again, re-enable pipe CRC. */ intel_crtc_enable_pipe_crc(crtc); } else { + if (new_crtc_state->preload_luts && + (new_crtc_state->base.color_mgmt_changed || + new_crtc_state->update_pipe)) + intel_color_load_luts(new_crtc_state); + intel_pre_plane_update(old_crtc_state, new_crtc_state); if (new_crtc_state->update_pipe) @@ -14717,6 +14722,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { if (new_crtc_state->base.active && !needs_modeset(new_crtc_state) && + !new_crtc_state->preload_luts && (new_crtc_state->base.color_mgmt_changed || new_crtc_state->update_pipe)) intel_color_load_luts(new_crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 4341bd66a4187..1a7334dbe8020 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -775,6 +775,7 @@ struct intel_crtc_state { bool disable_cxsr; bool update_wm_pre, update_wm_post; /* watermarks are updated */ bool fifo_changed; /* FIFO split is changed */ + bool preload_luts; /* Pipe source size (ie. panel fitter input size) * All planes will be positioned inside this space, From 2b3c7f0db8b4518713bad16e99271439d22dbc9d Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 11 Nov 2019 11:53:08 +0200 Subject: [PATCH 07/12] drm/i915/perf: always consider holding preemption a privileged op The ordering of the checks in the existing code can lead to holding preemption not being considered as privileged op. Signed-off-by: Lionel Landwerlin Fixes: 9cd20ef7803c ("drm/i915/perf: allow holding preemption on filtered ctx") Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191111095308.2550-1-lionel.g.landwerlin@intel.com (cherry picked from commit 0b0120d4c7b013eba59b33254febc0a6e4049e13) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_perf.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index a8c2318d3d5ed..00317ea19a4af 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -3307,15 +3307,6 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, } } - if (props->hold_preemption) { - if (!props->single_context) { - DRM_DEBUG("preemption disable with no context\n"); - ret = -EINVAL; - goto err; - } - privileged_op = true; - } - /* * On Haswell the OA unit supports clock gating off for a specific * context and in this mode there's no visibility of metrics for the @@ -3335,12 +3326,21 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, * doesn't request global stream access (i.e. query based sampling * using MI_RECORD_PERF_COUNT. */ - if (IS_HASWELL(perf->i915) && specific_ctx && !props->hold_preemption) + if (IS_HASWELL(perf->i915) && specific_ctx) privileged_op = false; else if (IS_GEN(perf->i915, 12) && specific_ctx && (props->sample_flags & SAMPLE_OA_REPORT) == 0) privileged_op = false; + if (props->hold_preemption) { + if (!props->single_context) { + DRM_DEBUG("preemption disable with no context\n"); + ret = -EINVAL; + goto err; + } + privileged_op = true; + } + /* Similar to perf's kernel.perf_paranoid_cpu sysctl option * we check a dev.i915.perf_stream_paranoid sysctl option * to determine if it's ok to access system wide OA counters From d231c15aff1eb4bde51036fb1e0aaec1706cf940 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Nov 2019 11:43:19 +0000 Subject: [PATCH 08/12] drm/i915: Protect context while grabbing its name for the request Inside print_request(), we query the context/timeline name. Nothing immediately protects the context from being freed if the request is complete -- we rely on serialisation by the caller to keep the name valid until they finish using it. Inside intel_engine_dump(), we generally only print the requests in the execution queue protected by the engine->active.lock, but we also show the pending execlists ports which are not protected and so require a rcu_read_lock to keep the pointer valid. [ 1695.700883] BUG: KASAN: use-after-free in i915_fence_get_timeline_name+0x53/0x90 [i915] [ 1695.700981] Read of size 8 at addr ffff8887344f4d50 by task gem_ctx_persist/2968 [ 1695.701068] [ 1695.701156] CPU: 1 PID: 2968 Comm: gem_ctx_persist Tainted: G U 5.4.0-rc6+ #331 [ 1695.701246] Hardware name: Intel Corporation NUC7i5BNK/NUC7i5BNB, BIOS BNKBL357.86A.0052.2017.0918.1346 09/18/2017 [ 1695.701334] Call Trace: [ 1695.701424] dump_stack+0x5b/0x90 [ 1695.701870] ? i915_fence_get_timeline_name+0x53/0x90 [i915] [ 1695.701964] print_address_description.constprop.7+0x36/0x50 [ 1695.702408] ? i915_fence_get_timeline_name+0x53/0x90 [i915] [ 1695.702856] ? i915_fence_get_timeline_name+0x53/0x90 [i915] [ 1695.702947] __kasan_report.cold.10+0x1a/0x3a [ 1695.703390] ? i915_fence_get_timeline_name+0x53/0x90 [i915] [ 1695.703836] i915_fence_get_timeline_name+0x53/0x90 [i915] [ 1695.704241] print_request+0x82/0x2e0 [i915] [ 1695.704638] ? fwtable_read32+0x133/0x360 [i915] [ 1695.705042] ? write_timestamp+0x110/0x110 [i915] [ 1695.705133] ? _raw_spin_lock_irqsave+0x79/0xc0 [ 1695.705221] ? refcount_inc_not_zero_checked+0x91/0x110 [ 1695.705306] ? refcount_dec_and_mutex_lock+0x50/0x50 [ 1695.705709] ? intel_engine_find_active_request+0x202/0x230 [i915] [ 1695.706115] intel_engine_dump+0x2c9/0x900 [i915] Fixes: c36eebd9ba5d ("drm/i915/gt: execlists->active is serialised by the tasklet") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20191111114323.5833-1-chris@chris-wilson.co.uk (cherry picked from commit fecffa4668cf62e679aeea8caa9d0f241f822578) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index f8113bc756c66..5ca3ec911e50f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1372,6 +1372,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, } execlists_active_lock_bh(execlists); + rcu_read_lock(); for (port = execlists->active; (rq = *port); port++) { char hdr[80]; int len; @@ -1409,6 +1410,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, if (tl) intel_timeline_put(tl); } + rcu_read_unlock(); execlists_active_unlock_bh(execlists); } else if (INTEL_GEN(dev_priv) > 6) { drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", From a7d87b70d6da96c6772e50728c8b4e78e4cbfd55 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 9 Nov 2019 10:53:56 +0000 Subject: [PATCH 09/12] drm/i915/pmu: "Frequency" is reported as accumulated cycles We report "frequencies" (actual-frequency, requested-frequency) as the number of accumulated cycles so that the average frequency over that period may be determined by the user. This means the units we report to the user are Mcycles (or just M), not MHz. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: stable@vger.kernel.org Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191109105356.5273-1-chris@chris-wilson.co.uk (cherry picked from commit e88866ef02851c88fe95a4bb97820b94b4d46f36) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_pmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 05395015d1f2c..0d40dccd14094 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -878,8 +878,8 @@ create_event_attributes(struct i915_pmu *pmu) const char *name; const char *unit; } events[] = { - __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"), - __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"), + __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "M"), + __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "M"), __event(I915_PMU_INTERRUPTS, "interrupts", NULL), __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"), }; From cee7fb437edcdb2f9f8affa959e274997f5dca4d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Nov 2019 13:32:03 +0000 Subject: [PATCH 10/12] drm/i915/userptr: Try to acquire the page lock around set_page_dirty() set_page_dirty says: For pages with a mapping this should be done under the page lock for the benefit of asynchronous memory errors who prefer a consistent dirty state. This rule can be broken in some special cases, but should be better not to. Under those rules, it is only safe for us to use the plain set_page_dirty calls for shmemfs/anonymous memory. Userptr may be used with real mappings and so needs to use the locked version (set_page_dirty_lock). However, following a try_to_unmap() we may want to remove the userptr and so call put_pages(). However, try_to_unmap() acquires the page lock and so we must avoid recursively locking the pages ourselves -- which means that we cannot safely acquire the lock around set_page_dirty(). Since we can't be sure of the lock, we have to risk skip dirtying the page, or else risk calling set_page_dirty() without a lock and so risk fs corruption. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=203317 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=112012 Fixes: 5cc9ed4b9a7a ("drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl") References: cb6d7c7dc7ff ("drm/i915/userptr: Acquire the page lock around set_page_dirty()") References: 505a8ec7e11a ("Revert "drm/i915/userptr: Acquire the page lock around set_page_dirty()"") References: 6dcc693bc57f ("ext4: warn when page is dirtied without buffers") Signed-off-by: Chris Wilson Cc: Lionel Landwerlin Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: stable@vger.kernel.org Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191111133205.11590-1-chris@chris-wilson.co.uk (cherry picked from commit 0d4bbe3d407f79438dc4f87943db21f7134cfc65) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 22 ++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 1e045c3370444..4c72d74d65764 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -646,8 +646,28 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, obj->mm.dirty = false; for_each_sgt_page(page, sgt_iter, pages) { - if (obj->mm.dirty) + if (obj->mm.dirty && trylock_page(page)) { + /* + * As this may not be anonymous memory (e.g. shmem) + * but exist on a real mapping, we have to lock + * the page in order to dirty it -- holding + * the page reference is not sufficient to + * prevent the inode from being truncated. + * Play safe and take the lock. + * + * However...! + * + * The mmu-notifier can be invalidated for a + * migrate_page, that is alreadying holding the lock + * on the page. Such a try_to_unmap() will result + * in us calling put_pages() and so recursively try + * to lock the page. We avoid that deadlock with + * a trylock_page() and in exchange we risk missing + * some page dirtying. + */ set_page_dirty(page); + unlock_page(page); + } mark_page_accessed(page); put_page(page); From 98ae6fb3f1dc2725449fa888b29b600a8bcab23e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Nov 2019 13:32:05 +0000 Subject: [PATCH 11/12] drm/i915/execlists: Move reset_active() from schedule-out to schedule-in The gem_ctx_persistence/smoketest was detecting an odd coherency issue inside the LRC context image; that the address of the ring buffer did not match our associated struct intel_ring. As we set the address into the context image when we pin the ring buffer into place before the context is active, that leaves the question of where did it get overwritten. Either the HW context save occurred after our pin which would imply that our idle barriers are broken, or we overwrote the context image ourselves. It is only in reset_active() where we dabble inside the context image outside of a serialised path from schedule-out; but we could equally perform the operation inside schedule-in which is then fully serialised with the context pin -- and remains serialised by the engine pulse with kill_context(). (The only downside, aside from doing more work inside the engine->active.lock, was the plan to merge all the reset paths into doing their context scrubbing on schedule-out needs more thought.) Fixes: d12acee84ffb ("drm/i915/execlists: Cancel banned contexts on schedule-out") Testcase: igt/gem_ctx_persistence/smoketest Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191111133205.11590-3-chris@chris-wilson.co.uk (cherry picked from commit 31b61f0ef9af62b6404d8df5dcd2cf58f80c9f53) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_lrc.c | 118 +++++++++++++++------------- 1 file changed, 62 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 51aef2a233cb2..0ac3b26674ad9 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -990,6 +990,59 @@ static void intel_engine_context_out(struct intel_engine_cs *engine) write_sequnlock_irqrestore(&engine->stats.lock, flags); } +static void restore_default_state(struct intel_context *ce, + struct intel_engine_cs *engine) +{ + u32 *regs = ce->lrc_reg_state; + + if (engine->pinned_default_state) + memcpy(regs, /* skip restoring the vanilla PPHWSP */ + engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, + engine->context_size - PAGE_SIZE); + + execlists_init_reg_state(regs, ce, engine, ce->ring, false); +} + +static void reset_active(struct i915_request *rq, + struct intel_engine_cs *engine) +{ + struct intel_context * const ce = rq->hw_context; + u32 head; + + /* + * The executing context has been cancelled. We want to prevent + * further execution along this context and propagate the error on + * to anything depending on its results. + * + * In __i915_request_submit(), we apply the -EIO and remove the + * requests' payloads for any banned requests. But first, we must + * rewind the context back to the start of the incomplete request so + * that we do not jump back into the middle of the batch. + * + * We preserve the breadcrumbs and semaphores of the incomplete + * requests so that inter-timeline dependencies (i.e other timelines) + * remain correctly ordered. And we defer to __i915_request_submit() + * so that all asynchronous waits are correctly handled. + */ + GEM_TRACE("%s(%s): { rq=%llx:%lld }\n", + __func__, engine->name, rq->fence.context, rq->fence.seqno); + + /* On resubmission of the active request, payload will be scrubbed */ + if (i915_request_completed(rq)) + head = rq->tail; + else + head = active_request(ce->timeline, rq)->head; + ce->ring->head = intel_ring_wrap(ce->ring, head); + intel_ring_update_space(ce->ring); + + /* Scrub the context image to prevent replaying the previous batch */ + restore_default_state(ce, engine); + __execlists_update_reg_state(ce, engine); + + /* We've switched away, so this should be a no-op, but intent matters */ + ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; +} + static inline struct intel_engine_cs * __execlists_schedule_in(struct i915_request *rq) { @@ -998,6 +1051,9 @@ __execlists_schedule_in(struct i915_request *rq) intel_context_get(ce); + if (unlikely(i915_gem_context_is_banned(ce->gem_context))) + reset_active(rq, engine); + if (ce->tag) { /* Use a fixed tag for OA and friends */ ce->lrc_desc |= (u64)ce->tag << 32; @@ -1047,72 +1103,22 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce) tasklet_schedule(&ve->base.execlists.tasklet); } -static void restore_default_state(struct intel_context *ce, - struct intel_engine_cs *engine) -{ - u32 *regs = ce->lrc_reg_state; - - if (engine->pinned_default_state) - memcpy(regs, /* skip restoring the vanilla PPHWSP */ - engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, - engine->context_size - PAGE_SIZE); - - execlists_init_reg_state(regs, ce, engine, ce->ring, false); -} - -static void reset_active(struct i915_request *rq, - struct intel_engine_cs *engine) -{ - struct intel_context * const ce = rq->hw_context; - u32 head; - - /* - * The executing context has been cancelled. We want to prevent - * further execution along this context and propagate the error on - * to anything depending on its results. - * - * In __i915_request_submit(), we apply the -EIO and remove the - * requests' payloads for any banned requests. But first, we must - * rewind the context back to the start of the incomplete request so - * that we do not jump back into the middle of the batch. - * - * We preserve the breadcrumbs and semaphores of the incomplete - * requests so that inter-timeline dependencies (i.e other timelines) - * remain correctly ordered. And we defer to __i915_request_submit() - * so that all asynchronous waits are correctly handled. - */ - GEM_TRACE("%s(%s): { rq=%llx:%lld }\n", - __func__, engine->name, rq->fence.context, rq->fence.seqno); - - /* On resubmission of the active request, payload will be scrubbed */ - if (i915_request_completed(rq)) - head = rq->tail; - else - head = active_request(ce->timeline, rq)->head; - ce->ring->head = intel_ring_wrap(ce->ring, head); - intel_ring_update_space(ce->ring); - - /* Scrub the context image to prevent replaying the previous batch */ - restore_default_state(ce, engine); - __execlists_update_reg_state(ce, engine); - - /* We've switched away, so this should be a no-op, but intent matters */ - ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; -} - static inline void __execlists_schedule_out(struct i915_request *rq, struct intel_engine_cs * const engine) { struct intel_context * const ce = rq->hw_context; + /* + * NB process_csb() is not under the engine->active.lock and hence + * schedule_out can race with schedule_in meaning that we should + * refrain from doing non-trivial work here. + */ + intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); intel_gt_pm_put(engine->gt); - if (unlikely(i915_gem_context_is_banned(ce->gem_context))) - reset_active(rq, engine); - /* * If this is part of a virtual engine, its next request may * have been blocked waiting for access to the active context. From 789c4aea3f08026360d026c0ea69b33797ac88c2 Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Wed, 13 Nov 2019 14:52:41 +0200 Subject: [PATCH 12/12] drm/i915: Split a setting of MSA to MST and SST MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The setting of MSA is done by the DDI .pre_enable() hook. And when we are using MST, the MSA is only set to first mst stream by calling of DDI .pre_eanble() hook. It raies issues to non-first mst streams. Wrong MSA or missed MSA packets might show scrambled screen or wrong screen. This splits a setting of MSA to MST and SST cases. And In the MST case it will call a setting of MSA after an allocating of Virtual Channel from MST encoder pre_enable callback. Cc: Ville Syrjälä Cc: Stanislav Lisovskiy Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=112212 Fixes: 0c06fa156006 ("drm/i915/dp: Add support of BT.2020 Colorimetry to DP MSA") Fixes: d4a415dcda35 ("drm/i915: Fix MST oops due to MSA changes") Signed-off-by: Gwan-gyeong Mun Link: https://patchwork.freedesktop.org/patch/msgid/20191106212636.502471-1-gwan-gyeong.mun@intel.com Reviewed-by: Lucas De Marchi [vsyrjala: nuke spurious newline] Signed-off-by: Ville Syrjälä (cherry picked from commit bd8c9cca88765caee0dfa93967c6d8f16b4cbfb9) Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20191113125241.20547-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 10 ++++++---- drivers/gpu/drm/i915/display/intel_dp_mst.c | 2 ++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index b51f244ad7a54..0d6e494b45082 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -1794,10 +1794,8 @@ void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state, * of Color Encoding Format and Content Color Gamut] while sending * YCBCR 420, HDR BT.2020 signals we should program MSA MISC1 fields * which indicate VSC SDP for the Pixel Encoding/Colorimetry Format. - * - * FIXME MST doesn't pass in the conn_state */ - if (conn_state && intel_dp_needs_vsc_sdp(crtc_state, conn_state)) + if (intel_dp_needs_vsc_sdp(crtc_state, conn_state)) temp |= DP_MSA_MISC_COLOR_VSC_SDP; I915_WRITE(TRANS_MSA_MISC(cpu_transcoder), temp); @@ -3605,7 +3603,11 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, else hsw_ddi_pre_enable_dp(encoder, crtc_state, conn_state); - intel_ddi_set_dp_msa(crtc_state, conn_state); + /* MST will call a setting of MSA after an allocating of Virtual Channel + * from MST encoder pre_enable callback. + */ + if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) + intel_ddi_set_dp_msa(crtc_state, conn_state); } static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 9ae5b8b6bbbc4..03d1cba0b6967 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -331,6 +331,8 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr); intel_ddi_enable_pipe_clock(pipe_config); + + intel_ddi_set_dp_msa(pipe_config, conn_state); } static void intel_mst_enable_dp(struct intel_encoder *encoder,